[med-svn] [soapdenovo2] 01/03: New upstream version 241+dfsg
Andreas Tille
tille at debian.org
Wed Sep 6 08:50:53 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository soapdenovo2.
commit 98dff73fb6124dad1d94ce7dd9ad236c6ae6499d
Author: Andreas Tille <tille at debian.org>
Date: Wed Sep 6 10:04:07 2017 +0200
New upstream version 241+dfsg
---
Makefile | 31 +-
README.md | 18 +-
VERSION | 2 +-
fusion/Makefile | 46 +
fusion/attachPEinfo.c | 634 ++
fusion/bundle.c | 517 ++
fusion/check.c | 69 +
fusion/connect.c | 194 +
fusion/darray.c | 60 +
fusion/fib.c | 691 ++
fusion/fibHeap.c | 77 +
fusion/finalFusion | Bin 0 -> 316190 bytes
fusion/hashFunction.c | 86 +
fusion/inc/check.h | 5 +
fusion/inc/darray.h | 23 +
fusion/inc/def.h | 299 +
fusion/inc/def2.h | 44 +
{standardPregraph => fusion}/inc/dfib.h | 47 +-
fusion/inc/dfibHeap.h | 43 +
fusion/inc/dfibpriv.h | 97 +
fusion/inc/extfunc.h | 209 +
fusion/inc/extfunc2.h | 7 +
fusion/inc/extvab.h | 92 +
fusion/inc/fib.h | 81 +
fusion/inc/fibHeap.h | 43 +
fusion/inc/fibpriv.h | 112 +
fusion/inc/general.h | 89 +
fusion/inc/global.h | 74 +
fusion/inc/newhash.h | 122 +
fusion/inc/nuc.h | 14 +
fusion/inc/stack.h | 35 +
fusion/inc/stdinc.h | 40 +
fusion/inc/types.h | 14 +
fusion/kmer.c | 145 +
fusion/lib.c | 438 +
fusion/loadGraph.c | 494 +
fusion/localAsm.c | 1976 ++++
fusion/main.c | 210 +
fusion/map.c | 42 +
fusion/mem_manager.c | 95 +
fusion/newhash.c | 630 ++
fusion/orderContig.c | 4426 +++++++++
fusion/output_scaffold.c | 76 +
fusion/potential.c | 268 +
fusion/prepare.c | 254 +
fusion/prlHashCtg.c | 387 +
fusion/prlRead2Ctg.c | 1090 +++
fusion/prlReadFillGap.c | 1234 +++
fusion/read2scaf.c | 294 +
fusion/readseq1by1.c | 591 ++
fusion/scaffold.c | 62 +
fusion/searchPath.c | 205 +
fusion/seq.c | 195 +
fusion/stack.c | 132 +
sparsePregraph/Makefile | 40 +-
sparsePregraph/build_edge.cpp | 2504 +++---
sparsePregraph/build_graph.cpp | 1124 +--
sparsePregraph/build_preArc.cpp | 2019 ++---
sparsePregraph/change.log | 24 -
sparsePregraph/convert_soapdenovo.cpp | 700 +-
sparsePregraph/global.cpp | 24 +-
sparsePregraph/inc/build_edge.h | 70 +-
sparsePregraph/inc/build_graph.h | 10 +-
sparsePregraph/inc/build_preArc.h | 152 +-
sparsePregraph/inc/convert_soapdenovo.h | 6 +-
sparsePregraph/inc/core.h | 164 +-
sparsePregraph/inc/faidx.h | 104 +-
sparsePregraph/inc/glf.h | 48 +-
sparsePregraph/inc/global.h | 24 +-
sparsePregraph/inc/io_func.h | 14 +-
sparsePregraph/inc/knetfile.h | 66 +-
sparsePregraph/inc/kstring.h | 66 +-
sparsePregraph/inc/multi_threads.h | 28 +-
sparsePregraph/inc/razf.h | 84 +-
sparsePregraph/inc/sam_header.h | 18 +-
sparsePregraph/inc/sam_view.h | 75 +-
sparsePregraph/inc/seq_util.h | 891 +-
sparsePregraph/inc/sparse_kmer.h | 447 +-
sparsePregraph/inc/stdinc.h | 2 +-
sparsePregraph/inc/xcurses.h | 1326 +--
sparsePregraph/inc/xcurses.h.gch | Bin 3154808 -> 0 bytes
sparsePregraph/io_func.cpp | 1565 ++--
sparsePregraph/main.cpp | 8 +-
sparsePregraph/multi_threads.cpp | 176 +-
sparsePregraph/pregraph_sparse.cpp | 1197 +--
standardPregraph/Makefile | 32 +-
standardPregraph/arc.c | 396 +-
standardPregraph/attachPEinfo.c | 1028 +--
standardPregraph/bubble.c | 3440 +++----
standardPregraph/check.c | 172 +-
standardPregraph/compactEdge.c | 182 +-
standardPregraph/concatenateEdge.c | 526 +-
standardPregraph/connect.c | 258 +-
standardPregraph/contig.c | 492 +-
standardPregraph/cutTipPreGraph.c | 1084 +--
standardPregraph/cutTip_graph.c | 810 +-
standardPregraph/cutTip_graph2.c | 770 +-
standardPregraph/darray.c | 88 +-
standardPregraph/dfib.c | 866 +-
standardPregraph/dfibHeap.c | 50 +-
standardPregraph/fib.c | 1002 +--
standardPregraph/fibHeap.c | 38 +-
standardPregraph/hashFunction.c | 190 +-
standardPregraph/inc/check.h | 8 +-
standardPregraph/inc/darray.h | 20 +-
standardPregraph/inc/def.h | 340 +-
standardPregraph/inc/def2.h | 34 +-
standardPregraph/inc/dfib.h | 4 +-
standardPregraph/inc/dfibHeap.h | 22 +-
standardPregraph/inc/dfibpriv.h | 34 +-
standardPregraph/inc/extfunc.h | 222 +-
standardPregraph/inc/extfunc2.h | 10 +-
standardPregraph/inc/extvab.h | 56 +-
standardPregraph/inc/faidx.h | 104 +-
standardPregraph/inc/fib.h | 10 +-
standardPregraph/inc/fibHeap.h | 22 +-
standardPregraph/inc/fibpriv.h | 58 +-
standardPregraph/inc/glf.h | 48 +-
standardPregraph/inc/global.h | 56 +-
standardPregraph/inc/kmerhash.h | 48 +-
standardPregraph/inc/knetfile.h | 66 +-
standardPregraph/inc/kstring.h | 66 +-
standardPregraph/inc/newhash.h | 68 +-
standardPregraph/inc/nuc.h | 2 +-
standardPregraph/inc/razf.h | 84 +-
standardPregraph/inc/sam_header.h | 18 +-
standardPregraph/inc/sam_view.h | 75 +-
standardPregraph/inc/stack.h | 36 +-
standardPregraph/inc/stdinc.h | 2 +-
standardPregraph/inc/types.h | 2 +-
standardPregraph/inc/xcurses.h | 1326 +--
standardPregraph/iterate.c | 4180 ++++-----
standardPregraph/kmer.c | 921 +-
standardPregraph/kmerhash.c | 678 +-
standardPregraph/lib.c | 1128 +--
standardPregraph/linearEdge.c | 598 +-
standardPregraph/loadGraph.c | 1042 +--
standardPregraph/loadPath.c | 522 +-
standardPregraph/loadPreGraph.c | 1014 +--
standardPregraph/localAsm.c | 3997 ++++-----
standardPregraph/main.c | 1008 ++-
standardPregraph/map.c | 276 +-
standardPregraph/mem_manager.c | 110 +-
standardPregraph/newhash.c | 844 +-
standardPregraph/node2edge.c | 1010 +--
standardPregraph/orderContig.c | 14376 ++++++++++++++++--------------
standardPregraph/output_contig.c | 691 +-
standardPregraph/output_pregraph.c | 128 +-
standardPregraph/output_scaffold.c | 120 +-
standardPregraph/pregraph.c | 309 +-
standardPregraph/prlHashCtg.c | 682 +-
standardPregraph/prlHashReads.c | 1905 ++--
standardPregraph/prlRead2Ctg.c | 2178 ++---
standardPregraph/prlRead2path.c | 2312 ++---
standardPregraph/prlReadFillGap.c | 3516 ++++----
standardPregraph/read2edge.c | 2757 +++---
standardPregraph/read2scaf.c | 526 +-
standardPregraph/readInterval.c | 46 +-
standardPregraph/readseq1by1.c | 2350 ++---
standardPregraph/scaffold.c | 343 +-
standardPregraph/searchPath.c | 416 +-
standardPregraph/seq.c | 204 +-
standardPregraph/splitReps.c | 868 +-
standardPregraph/stack.c | 236 +-
update.log | 91 -
165 files changed, 57227 insertions(+), 38555 deletions(-)
diff --git a/Makefile b/Makefile
index 8874681..8e8f80e 100644
--- a/Makefile
+++ b/Makefile
@@ -1,12 +1,13 @@
+MAKEFLAGS += --no-print-directory
CC = g++
ifdef debug
CFLAGS= -O0 -g -fomit-frame-pointer
else
-CFLAGS= -O4 -fomit-frame-pointer
+CFLAGS= -O3 -fomit-frame-pointer
endif
-SUBDIRS = sparsePregraph standardPregraph
-PROG= SOAPdenovo-63mer SOAPdenovo-127mer
+SUBDIRS = sparsePregraph standardPregraph fusion
+PROG= SOAPdenovo-63mer SOAPdenovo-127mer SOAPdenovo-fusion
INCLUDES= -I./sparsePregraph/inc -I./standardPregraph/inc
LIBPATH= -L/lib64 -L/usr/lib64 -L./sparsePregraph/inc -L./standardPregraph/inc
@@ -23,15 +24,6 @@ EXTRA_FLAGS += -Wl,--hash-style=both
LIBS += -lbam -lrt
endif
-ifneq (,$(findstring Unix,$(shell uname)))
-EXTRA_FLAGS += -Wl,--hash-style=both
-LIBS += -lbam -lrt
-endif
-
-ifneq (,$(findstring Darwin,$(shell uname)))
-LIBS += -lbammac
-endif
-
ifneq (,$(findstring $(shell uname -m), x86_64))
CFLAGS += -m64
endif
@@ -45,7 +37,10 @@ CFLAGS += -mpowerpc64
endif
-all: SOAPdenovo-63mer SOAPdenovo-127mer
+all: SOAPdenovo-63mer SOAPdenovo-127mer SOAPdenovo-fusion
+
+SOAPdenovo-fusion:
+ @cd fusion;make;cp SOAPdenovo-fusion ../;cd ..;
ifdef debug
SOAPdenovo-63mer:
@@ -56,10 +51,6 @@ SOAPdenovo-127mer:
@cd sparsePregraph;make 127mer=1 debug=1;cd ..;
@cd standardPregraph;make 127mer=1 debug=1;cd ..;
@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-127mer
-clean:
- @cd sparsePregraph;make clean;cd ..;
- @cd standardPregraph;make clean;cd ..;
- @rm SOAPdenovo-63mer SOAPdenovo-127mer -f
else
SOAPdenovo-63mer:
@cd sparsePregraph;make 63mer=1;cd ..;
@@ -69,8 +60,10 @@ SOAPdenovo-127mer:
@cd sparsePregraph;make 127mer=1;cd ..;
@cd standardPregraph;make 127mer=1;cd ..;
@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-127mer
+endif
+
clean:
@cd sparsePregraph;make clean;cd ..;
@cd standardPregraph;make clean;cd ..;
- @rm SOAPdenovo-63mer SOAPdenovo-127mer -f
-endif
+ @cd fusion;make clean;cd ..;
+ @rm -f SOAPdenovo-63mer SOAPdenovo-127mer SOAPdenovo-fusion
diff --git a/README.md b/README.md
index e3801a6..f0458ab 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,22 @@
# Manual of SOAPdenovo2
-## What's next of SOAPdenovo2
+## About MEGAHIT
-MEGAHIT is the formal successor of SOAPdenovo2
+MEGAHIT works with single-cell sequencing data and metagenomcis data. Compare to SOAPdenovo, it generates longer contigs and consumes less memory.
+
+To scaffold the contigs generated by MEGAHIT, please use SOAPdenovo-fusion. It is a preparation module that takes contigs as input and generates files that could be used consecutively by SOAPdenovo's map and scaff module.
+
+Reference:
MEGAHIT: An ultra-fast single-node solution for large and complex metagenomics assembly via succinct de Bruijn graph
-http://www.ncbi.nlm.nih.gov/pubmed/25609793
-https://github.com/voutcn/megahit
+
+<a href="http://www.ncbi.nlm.nih.gov/pubmed/25609793">Manuscript</a>
+
+<a href="https://github.com/voutcn/megahit">Github</a>
+
+## For MAC users
+
+Please use <a href="http://brew.sh">brew</a> to install SOAPdenovo. SOAPdenovo's package in Homebrew-science is managed by Shaun Jackman.
## Introduction
diff --git a/VERSION b/VERSION
index 080c272..8bd4c3a 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.04-r240
+2.04-r241
diff --git a/fusion/Makefile b/fusion/Makefile
new file mode 100755
index 0000000..6b1159a
--- /dev/null
+++ b/fusion/Makefile
@@ -0,0 +1,46 @@
+# Generated automatically from Makefile.in by configure.
+SHELL = /bin/sh
+
+exec_prefix = .
+bindir = $(exec_prefix)/bin
+libdir =
+mandir =
+
+CC = gcc
+CCOPT = -O3 -fprefetch-loop-arrays -funroll-loops -fomit-frame-pointer -w
+LIBS = -lm -lpthread
+INCDIRS = -Iinc/
+CFLAGS = ${CCOPT} ${INCDIRS}
+
+all: SOAPdenovo-fusion
+SRCS1 = searchPath.c scaffold.c check.c seq.c bundle.c potential.c\
+ loadGraph.c mem_manager.c attachPEinfo.c newhash.c\
+ output_scaffold.c orderContig.c connect.c hashFunction.c\
+ readseq1by1.c fib.c fibHeap.c stack.c kmer.c prepare.c
+OBJS1 = searchPath.o scaffold.o check.o seq.o bundle.o potential.o\
+ loadGraph.o mem_manager.o attachPEinfo.o newhash.o\
+ output_scaffold.o orderContig.o connect.o hashFunction.o\
+ readseq1by1.o fib.o fibHeap.o stack.o kmer.o prepare.o
+
+SRCS2 = prlHashCtg.c prlRead2Ctg.c map.c localAsm.c\
+ lib.c darray.c prlReadFillGap.c read2scaf.c
+OBJS2 = prlHashCtg.o prlRead2Ctg.o map.o localAsm.o\
+ lib.o darray.o prlReadFillGap.o read2scaf.o
+
+
+SRCS3 = main.c
+OBJS3 = main.o
+
+.c.o :
+ @printf "Compiling $<... \r"
+ @$(CC) $(CFLAGS) -c $<
+
+SOAPdenovo-fusion: $(OBJS1) $(OBJS2) $(OBJS3)
+ @printf "Making $@... \r"
+ @$(CC) $(CCOPT) -o $@ $^ $(LIBS)
+ @printf "$@ compilation done.\n";
+
+clean:
+ @/bin/rm -f *.o SOAPdenovo-fusion
+ @printf "SOAPdenovo-fusion cleaning done. \n"
+
diff --git a/fusion/attachPEinfo.c b/fusion/attachPEinfo.c
new file mode 100644
index 0000000..32bd94e
--- /dev/null
+++ b/fusion/attachPEinfo.c
@@ -0,0 +1,634 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+#include "stack.h"
+
+#define CNBLOCKSIZE 10000
+#define GAPARRSIZE 256
+#define BIG_NEG -10000000
+#define BIG_POS 10000000
+static STACK *isStack;
+static int ignorePE1, ignorePE2, ignorePE3, ignorePE4, ignorePE5, static_flag;
+static int onsameCtgPE;
+static unsigned long long peSUM;
+
+//static boolean staticF;
+
+static int existCounter;
+
+int calcuIS(STACK *intStack, int *SD);
+
+
+static int cmp_pe(const void *a, const void *b)
+{
+ PE_INFO *A, *B;
+ A = (PE_INFO *)a;
+ B = (PE_INFO *)b;
+
+ if(A->rank > B->rank)
+ return 1;
+ else if(A->rank == B->rank)
+ return 0;
+ else
+ return -1;
+}
+
+void loadPEgrads(char *infile)
+{
+ FILE *fp;
+ char name[256], line[1024];
+ int i;
+ boolean rankSet = 1;
+
+ sprintf(name, "%s.peGrads", infile);
+ fp = fopen(name, "r");
+
+ if(!fp)
+ {
+ printf("can not open file %s .\n", name);
+ gradsCounter = 0;
+ return;
+ }
+
+ while(fgets(line, sizeof(line), fp) != NULL)
+ {
+ if(line[0] == 'g')
+ {
+ sscanf(line + 10, "%d %lld %d", &gradsCounter, &n_solexa, &maxReadLen);
+ //printf("there're %d grads, %lld reads, max read len %d\n",gradsCounter,n_solexa,maxReadLen);
+ printf("[%s]reads statistic : %lld reads with max len %d in %d grads .\n", __FUNCTION__, n_solexa, maxReadLen, gradsCounter);
+ break;
+ }
+ }
+
+ alloc_pe_mem(gradsCounter);
+
+ for(i = 0; i < gradsCounter; i++)
+ {
+ fgets(line, sizeof(line), fp);
+ pes[i].rank = 0;
+ sscanf(line, "%d %lld %d %d", &(pes[i].insertS), &(pes[i].PE_bound), &(pes[i].rank), &(pes[i].pair_num_cut));
+
+ if(pes[i].rank < 1)
+ rankSet = 0;
+ }
+
+ fclose(fp);
+
+ if(rankSet)
+ {
+ qsort(&pes[0], gradsCounter, sizeof(PE_INFO), cmp_pe);
+ return;
+ }
+
+ int lastRank = 0;
+
+ for(i = 0; i < gradsCounter; i++)
+ {
+ if(i == 0)
+ pes[i].rank = ++lastRank;
+ else if(pes[i].insertS < 300)
+ pes[i].rank = lastRank;
+ else if(pes[i].insertS < 800)
+ {
+ if(pes[i - 1].insertS < 300)
+ pes[i].rank = ++lastRank;
+ else
+ pes[i].rank = lastRank;
+ }
+ else if(pes[i].insertS < 3000)
+ {
+ if(pes[i - 1].insertS < 800)
+ pes[i].rank = ++lastRank;
+ else
+ pes[i].rank = lastRank;
+ }
+ else if(pes[i].insertS < 7000)
+ {
+ if(pes[i - 1].insertS < 3000)
+ pes[i].rank = ++lastRank;
+ else
+ pes[i].rank = lastRank;
+ }
+ else
+ {
+ if(pes[i - 1].insertS < 7000)
+ pes[i].rank = ++lastRank;
+ else
+ pes[i].rank = lastRank;
+ }
+ }
+
+}
+
+
+CONNECT *add1Connect(unsigned int e1, unsigned int e2, int gap, int weight, boolean inherit)
+{
+ if(e1 == e2 || e1 == getTwinCtg(e2))
+ return NULL;
+
+ CONNECT *connect = NULL;
+ long long sum;
+
+ if(weight > 255)
+ weight = 255;
+
+ connect = getCntBetween(e1, e2);
+
+ if(connect)
+ {
+ if(!weight)
+ return connect;
+
+ existCounter++;
+
+ if(!inherit)
+ {
+ sum = connect->weightNotInherit * connect->gapLen + gap * weight;
+ connect->gapLen = sum / (connect->weightNotInherit + weight);
+
+ if(connect->weightNotInherit + weight <= 255)
+ connect->weightNotInherit += weight;
+ else if(connect->weightNotInherit < 255)
+ connect->weightNotInherit = 255;
+ }
+ else
+ {
+ sum = connect->weight * connect->gapLen + gap * weight;
+ connect->gapLen = sum / (connect->weight + weight);
+
+ if(!connect->inherit)
+ {
+ connect->maxSingleWeight = connect->weightNotInherit;
+ }
+
+ connect->inherit = 1;
+ connect->maxSingleWeight = connect->maxSingleWeight > weight ?
+ connect->maxSingleWeight : weight;
+ }
+
+ if(connect->weight + weight <= 255)
+ {
+ connect->weight += weight;
+ }
+ else if(connect->weight < 255)
+ {
+ connect->weight = 255;
+ }
+
+ }
+ else
+ {
+ newCntCounter++;
+ connect = allocateCN(e2, gap);
+
+ if(cntLookupTable)
+ putCnt2LookupTable(e1, connect);
+
+ connect->weight = weight;
+
+ if(contig_array[e1].mask || contig_array[e2].mask)
+ {
+ connect->mask = 1;
+ }
+
+ connect->next = contig_array[e1].downwardConnect;
+ contig_array[e1].downwardConnect = connect;
+
+ if(!inherit)
+ {
+ connect->weightNotInherit = weight;
+ }
+ else
+ {
+ connect->weightNotInherit = 0;
+ connect->inherit = 1;
+ connect->maxSingleWeight = weight;
+ }
+ }
+
+ return connect;
+}
+CONNECT *add1AccuConnect(unsigned int e1, unsigned int e2, int gap, int weight)
+{
+ if(e1 == e2 || e1 == getTwinCtg(e2))
+ return NULL;
+
+ CONNECT *connect = NULL;
+
+ //long long sum;
+ if(weight > 255)
+ weight = 255;
+
+ connect = getCntBetween(e1, e2);
+
+ if(connect)
+ {
+ if(!weight)
+ return connect;
+
+ existCounter++;
+ //if(!inherit){
+ //sum = connect->weightNotInherit*connect->gapLen + gap*weight;
+ //connect->gapLen = sum/(connect->weightNotInherit+weight);
+ int i = connect->weightNotInherit;
+
+ if(connect->weightNotInherit + weight <= 255)
+ connect->weightNotInherit += weight;
+ else if(connect->weightNotInherit < 255)
+ connect->weightNotInherit = 255;
+
+ for(; i < connect->weightNotInherit; i++)
+ {
+ connect->PE[i] = gap;
+ fprintf(stderr, "inputting a PE with estimated gap size %d\n", gap);
+ }
+
+ /*}else{
+ //sum = connect->weight*connect->gapLen + gap*weight;
+ //connect->gapLen = sum/(connect->weight+weight);
+ if(!connect->inherit){
+ connect->maxSingleWeight = connect->weightNotInherit;
+ }
+ connect->inherit = 1;
+ connect->maxSingleWeight = connect->maxSingleWeight>weight ?
+ connect->maxSingleWeight:weight;
+ }*/
+ if(connect->weight + weight <= 255)
+ {
+ connect->weight += weight;
+ }
+ else if(connect->weight < 255)
+ {
+ connect->weight = 255;
+ }
+
+ }
+ else
+ {
+ newCntCounter++;
+ connect = allocateCN(e2, gap);
+
+ if(cntLookupTable)
+ putCnt2LookupTable(e1, connect);
+
+ connect->weight = weight;
+ connect->PE = (int *)ckalloc(GAPARRSIZE * sizeof(int)); //newly added
+ fprintf(stderr, "creating array for PEs in a connection.\n");
+ int i;
+
+ for(i = 0; i < weight; i++)
+ {
+ connect->PE[i] = gap;
+ fprintf(stderr, "inputting a PE with estimated gap size %d\n", gap);
+ }
+
+ if(contig_array[e1].mask || contig_array[e2].mask)
+ {
+ connect->mask = 1;
+ }
+
+ connect->next = contig_array[e1].downwardConnect;
+ contig_array[e1].downwardConnect = connect;
+ //if(!inherit){
+ connect->weightNotInherit = weight;
+ /*}else{
+ connect->weightNotInherit = 0;
+ connect->inherit = 1;
+ connect->maxSingleWeight = weight;
+ }*/
+ }
+
+ return connect;
+}
+int attach1PE(unsigned int e1, int pre_pos, unsigned int bal_e2, int pos, int insert_size)
+{
+ int gap, realpeSize;
+ unsigned int bal_e1, e2;
+
+ if(e1 == bal_e2)
+ {
+ ignorePE1++;
+ return -1; //orientation wrong
+ }
+
+ bal_e1 = getTwinCtg(e1);
+ e2 = getTwinCtg(bal_e2);
+
+ if(e1 == e2)
+ {
+ realpeSize = contig_array[e1].length + overlaplen - pre_pos - pos;
+
+ if(realpeSize > 0)
+ {
+ peSUM += realpeSize;
+ onsameCtgPE++;
+
+ if((int)contig_array[e1].length > insert_size)
+ {
+ int *item = (int *)stackPush(isStack);
+ (*item) = realpeSize;
+ }
+ }
+
+ return 2;
+ }
+
+ gap = insert_size - overlaplen + pre_pos + pos - contig_array[e1].length - contig_array[e2].length;
+
+ //fprintf(stderr,"[%s]\tgap\t%d\t%d\t%f\t%f\n",__FUNCTION__,gap,insert_size,close_threshold,insert_size*close_threshold);
+ if(gap < -(insert_size * close_threshold))
+ {
+ ignorePE2++;
+ return 0;
+ }
+
+ if(gap > insert_size)
+ {
+ ignorePE3++;
+ return 0;
+ }
+
+ add1AccuConnect(e1, e2, gap, 1);
+ add1AccuConnect(bal_e2, bal_e1, gap, 1);
+
+ return 1;
+}
+
+int connectByPE_grad(FILE *fp, int peGrad, char *line)
+{
+ fprintf(stderr, "[%s]entering this function.\n", __FUNCTION__);
+ long long pre_readno, readno, minno, maxno;
+ int pre_pos, pos, flag, PE, count = 0;
+ unsigned int pre_contigno, contigno, newIndex;
+
+ if(peGrad < 0 || peGrad > gradsCounter)
+ {
+ printf("[%s]specified pe grad is out of bound .\n", __FUNCTION__);
+ return 0;
+ }
+
+ maxno = pes[peGrad].PE_bound;
+
+ if(peGrad == 0)
+ minno = 0;
+ else
+ minno = pes[peGrad - 1].PE_bound;
+
+ onsameCtgPE = peSUM = 0;
+ PE = pes[peGrad].insertS;
+
+ if(strlen(line))
+ {
+ sscanf(line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos);
+
+ //printf("first record %d %d %d\n",pre_readno,pre_contigno,pre_pos);
+ if(pre_readno <= minno)
+ pre_readno = -1;
+ }
+ else
+ pre_readno = -1;
+
+ ignorePE1 = ignorePE2 = ignorePE3 = ignorePE4 = ignorePE5 = 0;
+ static_flag = 1;
+ isStack = (STACK *)createStack(CNBLOCKSIZE, sizeof(int));
+
+ while(fgets(line, lineLen, fp) != NULL)
+ {
+ sscanf(line, "%lld %d %d", &readno, &contigno, &pos);
+
+ if(readno > maxno)
+ break;
+
+ if(readno <= minno)
+ continue;
+
+ newIndex = index_array[contigno];
+
+ //if(contig_array[newIndex].bal_edge==0)
+ if(isSameAsTwin(newIndex))
+ continue;
+
+ if(PE && (readno % 2 == 0) && (pre_readno == readno - 1)) // they are a pair of reads
+ {
+ flag = attach1PE(pre_contigno, pre_pos, newIndex, pos, PE);
+
+ if(flag == 1)
+ count++;
+ }
+
+ pre_readno = readno;
+ pre_contigno = newIndex;
+ pre_pos = pos;
+ }
+
+ printf("[%s]Finish loading all PEs in grad %d .\n", __FUNCTION__, peGrad);
+ printf("[%s]Calculating estimated gap size for all connections .\n", __FUNCTION__);
+ unsigned int i;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ CONNECT *tmp = contig_array[i].downwardConnect;
+
+ while(tmp)
+ {
+ if(tmp->weightNotInherit <= 8 && tmp->weightNotInherit > 2) //delete max and min value
+ {
+ int max = BIG_NEG, maxid = -1, min = BIG_POS, minid = -1;
+ int weight = tmp->weightNotInherit;
+ int ii;
+
+ for(ii = 0; ii < weight; ii++)
+ {
+ if(tmp->PE[ii] > max)
+ {
+ max = tmp->PE[ii];
+ maxid = ii;
+ }
+
+ if(tmp->PE[ii] <= min)
+ {
+ min = tmp->PE[ii];
+ minid = ii;
+ }
+ }
+
+ int sum = 0;
+
+ for(ii = 0; ii < weight; ii++)
+ {
+ if(ii != maxid && ii != minid)
+ {
+ sum += tmp->PE[ii];
+ }
+ }
+
+ ignorePE4 += 2;
+ tmp->gapLen = sum / (weight - 2);
+ fprintf(stderr, "estimating contigs' gap by removing max&min PE ,with max&min %d %d\n",
+ tmp->PE[maxid], tmp->PE[minid]);
+ }
+ else if(tmp->weightNotInherit > 8) //delete values exceed 3*SD
+ {
+ long long int sum = 0;
+ int weight = tmp->weightNotInherit;
+ int ii;
+ int counter = 0;
+
+ for(ii = 0; ii < weight; ii++)
+ {
+ sum += tmp->PE[ii];
+ }
+
+ long long int avg = sum / weight;
+ sum = 0;
+
+ for(ii = 0; ii < weight; ii++)
+ {
+ sum += ((tmp->PE[ii] - avg) * (tmp->PE[ii] - avg));
+ }
+
+ double SD = (sqrt((double)sum / (weight - 1))) * 3; //just for fast
+ sum = 0;
+ int num = 0;
+
+ for(ii = 0; ii < weight; ii++)
+ {
+ if(abs(tmp->PE[ii] - avg) <= SD)
+ {
+ sum += tmp->PE[ii];
+ num++;
+ }
+ else
+ {
+ ignorePE5++;
+ counter++;
+ }
+ }
+
+ if(num == 0)
+ {
+ fprintf(stderr, "[%s]num=0 in removing exceed 3*SD(%.1f) avg(%d)step", __FUNCTION__, SD, avg);
+
+ for(ii = 0; ii < weight; ii++)
+ {
+ fprintf(stderr, "%d\t", tmp->PE[ii]);
+ }
+ }
+
+ tmp->gapLen = sum / num;
+ fprintf(stderr, "estimating contigs' gap by removing PE exceeding 3*SD ,removing %d PEs\n", counter);
+ }
+ else if(tmp->weightNotInherit <= 2)
+ {
+ int weight = tmp->weightNotInherit;
+ int sum = 0;
+ int ii;
+
+ for(ii = 0; ii < weight; ii++)
+ {
+ sum += tmp->PE[ii];
+ }
+
+ tmp->gapLen = sum / weight;
+ fprintf(stderr, "weight too small , directly estimate gap size.\n");
+ }
+
+ //fprintf(stderr,"finish %d connection.\n",i);
+ free((void *)tmp->PE);
+ tmp = tmp->next;
+ }
+ }
+
+ //printf("%d PEs with insert size %d attached, %d + %d + %d ignored\n",count,PE,ignorePE1,ignorePE2,ignorePE3);
+ fprintf(stderr, "[%s]%d PEs of insert size %d loaded .\n", __FUNCTION__, count, PE);
+ fprintf(stderr, "[%s]PEs discarded:%d because of wrong orientation,%d too close,%d too far,\n", __FUNCTION__, ignorePE1, ignorePE2, ignorePE3);
+ fprintf(stderr, "[%s]%d deleted by removing max&min , %d not fall in 3*SD.\n", __FUNCTION__, ignorePE4, ignorePE5);
+ printf("[%s]%d PEs of insert size %d loaded .\n", __FUNCTION__, count, PE);
+ printf("[%s]PEs discarded :%d because of wrong orientation,%d too close,%d too far ,\n", __FUNCTION__, ignorePE1, ignorePE2, ignorePE3);
+ printf("[%s]%d deleted by removing max&min , %d not fall in 3*SD .\n", __FUNCTION__, ignorePE4, ignorePE5);
+
+ if(onsameCtgPE > 0)
+ {
+ //printf("estimated PE size %lli, by %d pairs\n",peSUM/onsameCtgPE,onsameCtgPE);
+ int SD = 0;
+ int avg = calcuIS(isStack, &SD);
+ printf("[%s]%d PE attached on same contig with estimated insert size %d SD %d .\n", __FUNCTION__, onsameCtgPE, avg, SD);
+ }
+
+ //printf("on contigs longer than %d, %d pairs found,",PE,isStack->item_c);
+ //printf("insert_size estimated: %d\n",calcuIS(isStack));
+ freeStack(isStack);
+ return count;
+}
+
+
+int calcuIS(STACK *intStack, int *SD)
+{
+ long long sum = 0;
+ int avg = 0;
+ int *item;
+ int num = intStack->item_c;
+
+ if(num < 100)
+ return avg;
+
+ stackBackup(intStack);
+
+ while((item = (int *)stackPop(intStack)) != NULL)
+ sum += *item;
+
+ stackRecover(intStack);
+ num = intStack->item_c;
+ avg = sum / num;
+
+ sum = 0;
+ stackBackup(intStack);
+
+ while((item = (int *)stackPop(intStack)) != NULL)
+ sum += (*item - avg) * (*item - avg);
+
+ *SD = sqrt(sum / (num - 1));
+
+ if(SD == 0)
+ {
+ //printf("SD=%d, ",SD);
+ return avg;
+ }
+
+ stackRecover(intStack);
+ sum = num = 0;
+
+ while((item = (int *)stackPop(intStack)) != NULL)
+ if(abs(*item - avg) < 3 * *SD)
+ {
+ sum += *item;
+ num++;
+ }
+
+ avg = sum / num;
+ //printf("SD=%d, ",SD);
+ return avg;
+
+}
+
+unsigned int getTwinCtg(unsigned int ctg)
+{
+ return ctg + contig_array[ctg].bal_edge - 1;
+}
+
+boolean isSmallerThanTwin(unsigned int ctg)
+{
+ return contig_array[ctg].bal_edge > 1;
+}
+
+boolean isLargerThanTwin(unsigned int ctg)
+{
+ return contig_array[ctg].bal_edge < 1;
+}
+
+boolean isSameAsTwin(unsigned int ctg)
+{
+ return contig_array[ctg].bal_edge == 1;
+}
diff --git a/fusion/bundle.c b/fusion/bundle.c
new file mode 100644
index 0000000..391e1d4
--- /dev/null
+++ b/fusion/bundle.c
@@ -0,0 +1,517 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+#include "dfibHeap.h"
+#include "fibHeap.h"
+#include "darray.h"
+
+
+#define CNBLOCKSIZE 10000
+#define GAPARRSIZE 256
+#define BIG_NEG -10000000
+#define BIG_POS 10000000
+static STACK *isStack;
+static int onsameCtgPE;
+extern int calcuIS(STACK *intStack, int *SD);
+void outputBundle(FILE *fp, int insertS);
+
+static CONNECT *bun1AccuConnect(unsigned int e1, unsigned int e2, int gap, int weight)
+{
+ if(e1 == e2 || e1 == getTwinCtg(e2))
+ return NULL;
+
+ CONNECT *connect = NULL;
+
+ //long long sum;
+ if(weight > 255)
+ weight = 255;
+
+ connect = getCntBetween(e1, e2);
+
+ if(connect)
+ {
+ if(!weight)
+ return connect;
+
+ //existCounter++;
+ //if(!inherit){
+ //sum = connect->weightNotInherit*connect->gapLen + gap*weight;
+ //connect->gapLen = sum/(connect->weightNotInherit+weight);
+ int i = connect->weightNotInherit;
+
+ if(connect->weightNotInherit + weight <= 255)
+ connect->weightNotInherit += weight;
+ else if(connect->weightNotInherit < 255)
+ connect->weightNotInherit = 255;
+
+ for(; i < connect->weightNotInherit; i++)
+ {
+ //connect->PE[i]=gap;
+ //fprintf(stderr,"inputting a PE with estimated gap size %d\n",gap);
+ }
+
+ /*}else{
+ //sum = connect->weight*connect->gapLen + gap*weight;
+ //connect->gapLen = sum/(connect->weight+weight);
+ if(!connect->inherit){
+ connect->maxSingleWeight = connect->weightNotInherit;
+ }
+ connect->inherit = 1;
+ connect->maxSingleWeight = connect->maxSingleWeight>weight ?
+ connect->maxSingleWeight:weight;
+ }*/
+ if(connect->weight + weight <= 255)
+ {
+ connect->weight += weight;
+ }
+ else if(connect->weight < 255)
+ {
+ connect->weight = 255;
+ }
+
+ }
+ else
+ {
+ newCntCounter++;
+ connect = allocateCN(e2, gap);
+
+ if(cntLookupTable)
+ putCnt2LookupTable(e1, connect);
+
+ connect->weight = weight;
+ //connect->PE=(int *)ckalloc(GAPARRSIZE*sizeof(int));//newly added
+ //fprintf(stderr,"creating array for PEs in a connection.\n");
+ int i;
+
+ for(i = 0; i < weight; i++)
+ {
+ //connect->PE[i]=gap;
+ //fprintf(stderr,"inputting a PE with estimated gap size %d\n",gap);
+ }
+
+ if(contig_array[e1].mask || contig_array[e2].mask)
+ {
+ connect->mask = 1;
+ }
+
+ connect->next = contig_array[e1].downwardConnect;
+ contig_array[e1].downwardConnect = connect;
+ //if(!inherit){
+ connect->weightNotInherit = weight;
+ /*}else{
+ connect->weightNotInherit = 0;
+ connect->inherit = 1;
+ connect->maxSingleWeight = weight;
+ }*/
+ }
+
+ return connect;
+}
+
+static int in1PE(unsigned int e1, int pre_pos, unsigned int bal_e2, int pos, int insert_size)
+{
+ int gap, realpeSize;
+ unsigned int bal_e1, e2;
+
+ if(e1 == bal_e2)
+ {
+ //ignorePE1++;
+ return -1; //orientation wrong
+ }
+
+ bal_e1 = getTwinCtg(e1);
+ e2 = getTwinCtg(bal_e2);
+
+ if(e1 == e2)
+ {
+ realpeSize = contig_array[e1].length + overlaplen - pre_pos - pos;
+
+ if(realpeSize > 0)
+ {
+ //peSUM += realpeSize;
+ onsameCtgPE++;
+
+ if((int)contig_array[e1].length > insert_size)
+ {
+ int *item = (int *)stackPush(isStack);
+ (*item) = realpeSize;
+ }
+ }
+
+ return 2;
+ }
+
+ gap = insert_size - overlaplen + pre_pos + pos - contig_array[e1].length - contig_array[e2].length;
+ //fprintf(stderr,"[%s]\t%d\t%d\tgap\t%d\t%d\t%d\t%d\n",__FUNCTION__,e1,e2,gap,contig_array[e1].bal_edge,contig_array[e2].bal_edge,insert_size);
+ //if(gap<-(insert_size/10)){
+ // //ignorePE2++;
+ // return 0;
+ //}
+ bun1AccuConnect(e1, e2, gap, 1);
+ bun1AccuConnect(bal_e2, bal_e1, gap, 1);
+
+ return 1;
+}
+
+static int inputPE(FILE *fp, int peGrad, char *line)
+{
+ long long pre_readno, readno, minno, maxno;
+ int pre_pos, pos, flag, PE, count = 0;
+ unsigned int pre_contigno, contigno, newIndex;
+
+ if(peGrad < 0 || peGrad > gradsCounter)
+ {
+ printf("[%s]specified pe grad is out of bound .\n", __FUNCTION__);
+ return 0;
+ }
+
+ maxno = pes[peGrad].PE_bound;
+
+ if(peGrad == 0)
+ minno = 0;
+ else
+ minno = pes[peGrad - 1].PE_bound;
+
+ //onsameCtgPE = peSUM = 0;
+ PE = pes[peGrad].insertS;
+
+ if(strlen(line))
+ {
+ sscanf(line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos);
+
+ //printf("first record %d %d %d\n",pre_readno,pre_contigno,pre_pos);
+ if(pre_readno <= minno)
+ pre_readno = -1;
+ }
+ else
+ pre_readno = -1;
+
+ //ignorePE1 = ignorePE2 = ignorePE3 = ignorePE4 = ignorePE5 = 0;
+ //static_flag = 1;
+ isStack = (STACK *)createStack(CNBLOCKSIZE, sizeof(int));
+
+ while(fgets(line, lineLen, fp) != NULL)
+ {
+ sscanf(line, "%lld %d %d", &readno, &contigno, &pos);
+
+ if(readno > maxno)
+ break;
+
+ if(readno <= minno)
+ continue;
+
+ newIndex = index_array[contigno];
+
+ //if(contig_array[newIndex].bal_edge==0)
+ if(isSameAsTwin(newIndex))
+ continue;
+
+ if(PE && (readno % 2 == 0) && (pre_readno == readno - 1)) // they are a pair of reads
+ {
+ flag = in1PE(pre_contigno, pre_pos, newIndex, pos, PE);
+
+ if(flag == 1)
+ count++;
+ }
+
+ pre_readno = readno;
+ pre_contigno = newIndex;
+ pre_pos = pos;
+ }
+
+ printf("[%s]Finish loading all PEs in grad %d .\n", __FUNCTION__, peGrad);
+ printf("[%s]Calculating estimated gap size for all connections .\n", __FUNCTION__);
+ /*unsigned int i;
+ for(i=1;i<=num_ctg;i++){
+ CONNECT *tmp=contig_array[i].downwardConnect;
+ while(tmp){
+ if(tmp->weightNotInherit<=8&&tmp->weightNotInherit>2){//delete max and min value
+ int max=BIG_NEG,maxid=-1,min=BIG_POS,minid=-1;
+ int weight=tmp->weightNotInherit;
+ int ii;
+ for(ii=0;ii<weight;ii++){
+ if(tmp->PE[ii]>max){
+ max=tmp->PE[ii];
+ maxid=ii;
+ }
+ if(tmp->PE[ii]<=min){
+ min=tmp->PE[ii];
+ minid=ii;
+ }
+ }
+ int sum=0;
+ for(ii=0;ii<weight;ii++){
+ if(ii!=maxid&&ii!=minid){
+ sum+=tmp->PE[ii];
+ }
+ }
+ //ignorePE4+=2;
+ tmp->gapLen=sum/(weight-2);
+ //fprintf(stderr,"estimating contigs' gap by removing max&min PE ,with max&min %d %d\n",
+ //tmp->PE[maxid],tmp->PE[minid]);
+ }else if(tmp->weightNotInherit>8){//delete values exceed 3*SD
+ long long int sum=0;
+ int weight=tmp->weightNotInherit;
+ int ii;
+ int counter=0;
+ for(ii=0;ii<weight;ii++){
+ sum+=tmp->PE[ii];
+ }
+
+ long long int avg=sum/weight;
+ sum = 0;
+ for(ii=0;ii<weight;ii++){
+ sum+=((avg-(long long int)tmp->PE[ii])*(avg-(long long int)tmp->PE[ii]));
+ }
+
+ double SD=(sqrt((double)sum/(weight-1)))*3;//just for fast
+ sum=0;
+ int num=0;
+ for(ii=0;ii<weight;ii++){
+ if(abs(tmp->PE[ii]-avg)<=SD){
+ sum+=tmp->PE[ii];
+ num++;
+ }else{
+ //ignorePE5++;
+ counter++;
+ }
+ }
+ if(num==0){
+ //fprintf(stderr,"[%s]num=0 in removing exceed 3*SD(%.1f) avg(%lld)step",__FUNCTION__,SD,avg);
+ for(ii=0;ii<weight;ii++){
+ fprintf(stderr,"%d\t",tmp->PE[ii]);
+ }
+ }
+ tmp->gapLen=sum/num;
+ //fprintf(stderr,"estimating contigs' gap by removing PE exceeding 3*SD ,removing %d PEs\n",counter);
+ }else if(tmp->weightNotInherit<=2){
+ int weight=tmp->weightNotInherit;
+ int sum=0;
+ int ii;
+ for(ii=0;ii<weight;ii++){
+ sum+=tmp->PE[ii];
+ }
+ tmp->gapLen=sum/weight;
+ //fprintf(stderr,"weight too small , directly estimate gap size.\n");
+ //}
+ //fprintf(stderr,"finish %d connection.\n",i);
+ //free((void *)tmp->PE);
+ tmp=tmp->next;
+ }
+ }*/
+ //printf("%d PEs with insert size %d attached, %d + %d + %d ignored\n",count,PE,ignorePE1,ignorePE2,ignorePE3);
+ fprintf(stderr, "[%s]%d PEs of insert size %d loaded .\n", __FUNCTION__, count, PE);
+ //fprintf(stderr,"[%s]PEs discarded:%d because of wrong orientation,%d too close,%d too far,\n",__FUNCTION__,ignorePE1,ignorePE2,ignorePE3);
+ //fprintf(stderr,"[%s]%d deleted by removing max&min , %d not fall in 3*SD.\n",__FUNCTION__,ignorePE4,ignorePE5);
+ //printf("[%s]%d PEs of insert size %d loaded .\n",__FUNCTION__,count,PE);
+ //printf("[%s]PEs discarded :%d because of wrong orientation,%d too close,%d too far ,\n",__FUNCTION__,ignorePE1,ignorePE2,ignorePE3);
+ //printf("[%s]%d deleted by removing max&min , %d not fall in 3*SD .\n",__FUNCTION__,ignorePE4,ignorePE5);
+
+ /*if(onsameCtgPE>0){
+ //printf("estimated PE size %lli, by %d pairs\n",peSUM/onsameCtgPE,onsameCtgPE);
+ int SD=0;
+ int avg=calcuIS(isStack,&SD);
+ printf("[%s]%d PE attached on same contig with estimated insert size %d SD %d .\n",__FUNCTION__,onsameCtgPE,avg,SD);
+ }*/
+ //printf("on contigs longer than %d, %d pairs found,",PE,isStack->item_c);
+ //printf("insert_size estimated: %d\n",calcuIS(isStack));
+ //freeStack(isStack);
+ return count;
+}
+
+int call_bundle()
+{
+ char name[256], *line;
+ FILE *fp, *linkF;
+ int i;
+ int flag = 0;
+ unsigned int j;
+
+ loadUpdatedEdges(graphfile);
+
+ //sprintf(name,"%s.bundle",graphfile);
+
+ linkF = ckopen(name, "w");
+
+ if(!pes)
+ loadPEgrads(graphfile);
+
+ sprintf(name, "%s.readOnContig", graphfile);
+ fp = ckopen(name, "r");
+
+ lineLen = 1024;
+ line = (char *)ckalloc(lineLen * sizeof(char));
+
+ fgets(line, lineLen, fp);
+ line[0] = '\0';
+
+ //printf("\n");
+ newCntCounter = 0;
+
+ //createCntMemManager();
+ //createCntLookupTable();
+ /*int *length_array = (unsigned int *)ckalloc((num_ctg+1)*sizeof(unsigned int));
+ //use length_array to change info in index_array
+ for(i=1;i<=num_ctg;i++)
+ length_array[i] = 0;
+
+ for(i=1;i<=num_ctg;i++){
+ if(index_array[i]>0)
+ length_array[index_array[i]] = i;
+ }
+ for(i=1;i<=num_ctg;i++)
+ index_array[i] = length_array[i];
+ */
+ for(i = 0; i < gradsCounter; i++)
+ {
+
+ createCntMemManager();
+ createCntLookupTable();
+ //
+ flag += inputPE(fp, i, line);
+ //sprintf(name,"%d.bundle",i);
+
+ //printf("%lld new connections\n",newCntCounter/2);
+ /*if(!flag){
+ destroyConnectMem();
+ deleteCntLookupTable();
+ for(j=1;j<=num_ctg;j++)
+ contig_array[j].downwardConnect = NULL;
+ //printf("\n");
+ continue;
+ }*/
+ flag = 0;
+ //linkF= ckopen(name,"w");
+ //outputBundle(linkF, pes[i].insertS);
+
+ for(j = 1; j <= num_ctg; j++)
+ {
+ CONNECT *tmp = contig_array[j].downwardConnect;
+
+ while(tmp)
+ {
+ free((void *)tmp->PE);
+ tmp = tmp->next;
+ }
+
+ contig_array[j].downwardConnect = NULL;
+ }
+
+ //destroyConnectMem();
+ //deleteCntLookupTable();
+
+ fclose(linkF);
+ }
+
+ outputBundle(linkF, 1);
+ destroyConnectMem();
+ deleteCntLookupTable();
+
+ free((void *)line);
+ fclose(fp);
+ //fclose(linkF);
+ printf("[%s]all PEs attached\n", __FUNCTION__);
+
+ return 0;
+}
+
+void outputBundle(FILE *fp, int insertS)
+{
+ unsigned int i, bal_ctg, bal_toCtg;
+ CONNECT *cnts, *temp_cnt;
+
+ //printf("outputLinks, %d contigs\n",num_ctg);
+ for(i = 1; i <= num_ctg; i++)
+ {
+ cnts = contig_array[i].downwardConnect;
+ bal_ctg = getTwinCtg(i);
+
+ //fprintf(stderr,"contig %d.\n",i);
+ while(cnts)
+ {
+ if(cnts->weightNotInherit <= bund_threshold)
+ {
+ cnts = cnts->next;
+ continue;
+ }
+
+ //fprintf(stderr,"with contig %d.\n",cnts->contigID);
+ //fprintf(fp,"%-10d %-10d\t%d\t%d\t%d\n"
+ //,i,cnts->contigID,cnts->gapLen,cnts->weight,insertS);
+ /*int st1,st2,ed1,ed2,len1,len2,gap;
+ len1=contig_array[i].length+overlaplen;
+ len2=contig_array[cnts->contigID].length+overlaplen;
+ gap=-cnts->gapLen;
+ if(len1<gap){
+ st1=0;
+ if(len2<gap){
+ ed1=len1-gap+len2;
+ st2=gap-len1;
+ ed2=len2;
+ }else{
+ ed1=len1;
+ st2=gap-len1;
+ ed2=gap;
+ }
+ }else{
+ st1=len1+overlaplen-gap;
+ st2=0;
+ if(len2<gap){
+ ed1=st1+len2;
+ ed2=len2;
+ }else{
+ ed2=gap;
+ ed1=len1;
+ }
+ }
+
+ unsigned int id1,id2;
+ id1=index_array[i];
+ id2=index_array[cnts->contigID];*/
+ /*if((id1/2+1)==1194){
+ int ii;
+ fprintf(stdout,"\n");
+ for(ii=0;ii<cnts->weightNotInherit;++ii){
+ fprintf(stdout,"%d ",cnts->PE[ii]);
+ }
+ fprintf(stdout,"\n");
+ }*/
+ /*if(isSmallerThanTwin(id1)){
+ if(isSmallerThanTwin(id2)){
+ fprintf(fp,"%u\t%d\t%u\t%d\t%d\n",id1/2+1,len1,id2/2+1,len2,cnts->gapLen,cnts->weightNotInherit);
+
+ }else{
+ fprintf(fp,"%u\t%d\t%u\t%d\t%d\n",id1/2+1,len1,-id2/2,len2,cnts->gapLen,cnts->weightNotInherit);
+ }
+ }else{
+ if(isSmallerThanTwin(id2)){
+ fprintf(fp,"%u\t%d\t%u\t%d\t%d\n",-id1/2,len1,id2/2+1,len2,cnts->gapLen,cnts->weightNotInherit);
+ }else{
+ fprintf(fp,"%u\t%d\t%u\t%d\t%d\n",-id1/2,len1,-id2/2,len2,cnts->gapLen,cnts->weightNotInherit);
+ }
+ }*/
+ //int ii=0;
+ //int weight=cnts->weightNotInherit;
+ //for(;ii<weight;++ii){
+ // fprintf(fp,"%d\t%d\t%d\t",icnts->gapLen);
+ //}
+ if(cnts->gapLen < 0)
+ {
+ fprintf(fp, "%d\t%d\t%d\n", i, cnts->contigID, cnts->gapLen);
+ }
+
+ //fprintf(fp,"\n");
+ cnts->weightNotInherit = 0;
+
+ bal_toCtg = getTwinCtg(cnts->contigID);
+ temp_cnt = getCntBetween(bal_toCtg, bal_ctg);
+
+ if(temp_cnt)
+ temp_cnt->weightNotInherit = 0;
+
+ cnts = cnts->next;
+ }
+ }
+}
+
diff --git a/fusion/check.c b/fusion/check.c
new file mode 100644
index 0000000..8f897f4
--- /dev/null
+++ b/fusion/check.c
@@ -0,0 +1,69 @@
+/***************************************************************************
+ * Title: check.c
+ * Author: Haixu Tang
+ * Created: Jun. 2002
+ * Last modified: May. 2004
+ *
+ * Copyright (c) 2001-2004 The Regents of the University of California
+ * All Rights Reserved
+ * See file LICENSE for details.
+ ***************************************************************************/
+
+/* ckopen - open file; check for success */
+
+#include <stdinc.h>
+//#include <extfunc.h>
+
+void *ckalloc(unsigned long long amount);
+FILE *ckopen(char *name, char *mode);
+
+FILE *ckopen(char *name, char *mode)
+{
+ FILE *fp;
+
+ if ((fp = fopen(name, mode)) == NULL)
+ {
+ printf("Cannot open file %s.\n", name);
+ exit(-1);
+ }
+
+ return(fp);
+}
+
+
+/* ckalloc - allocate space; check for success */
+
+void *ckalloc(unsigned long long amount)
+{
+ void *p;
+
+ if ((p = (void *) calloc( 1, (unsigned long long) amount)) == NULL && amount != 0)
+ {
+ printf("not enought memory");
+ fflush(stdout);
+ exit(-1);
+ }
+
+ return(p);
+}
+
+
+/* reallocate memory */
+void *ckrealloc(void *p, size_t new_size, size_t old_size)
+{
+ void *q;
+
+ q = realloc((void *) p, new_size);
+
+ if (new_size == 0 || q != (void *) 0)
+ return q;
+
+ /* manually reallocate space */
+ q = ckalloc(new_size);
+
+ /* move old memory to new space */
+ bcopy(p, q, old_size);
+ free(p);
+
+ return q;
+}
diff --git a/fusion/connect.c b/fusion/connect.c
new file mode 100644
index 0000000..3840f8d
--- /dev/null
+++ b/fusion/connect.c
@@ -0,0 +1,194 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+#define CNBLOCKSIZE 100000
+
+void createCntMemManager()
+{
+ if(!cn_mem_manager)
+ cn_mem_manager = createMem_manager(CNBLOCKSIZE, sizeof(CONNECT));
+
+ //else
+ //printf("cn_mem_manger was created\n");
+}
+
+void destroyConnectMem()
+{
+ freeMem_manager(cn_mem_manager);
+ cn_mem_manager = NULL;
+}
+
+CONNECT *allocateCN(unsigned int contigId, int gap)
+{
+ CONNECT *newCN;
+ newCN = (CONNECT *)getItem(cn_mem_manager);
+ newCN->contigID = contigId;
+ newCN->gapLen = gap;
+
+ newCN->minGap = 0;
+ newCN->maxGap = 0;
+ newCN->bySmall = 0;
+ newCN->weakPoint = 0;
+
+ newCN->weight = 1;
+ newCN->weightNotInherit = 0;
+ newCN->mask = 0;
+ newCN->used = 0;
+ newCN->checking = 0;
+ newCN->deleted = 0;
+ newCN->prevInScaf = 0;
+ newCN->inherit = 0;
+ newCN->singleInScaf = 0;
+ newCN->nextInScaf = NULL;
+ newCN->PE = NULL; //(int *)ckalloc(CNBLOCKSIZE*sizeof(int));
+
+ return newCN;
+}
+
+void output_cntGVZ(char *outfile)
+{
+ char name[256];
+ FILE *fp;
+ unsigned int i;
+ CONNECT *connect;
+ boolean flag;
+
+ sprintf(name, "%s.scaffold.gvz", outfile);
+ fp = ckopen(name, "w");
+ fprintf(fp, "digraph G{\n");
+ fprintf(fp, "\tsize=\"512,512\";\n");
+
+ for(i = num_ctg; i > 0; i--)
+ {
+ //if(contig_array[i].mask||!contig_array[i].downwardConnect)
+ if(!contig_array[i].downwardConnect)
+ continue;
+
+ connect = contig_array[i].downwardConnect;
+
+ while(connect)
+ {
+ //if(connect->mask||connect->deleted){
+ if(connect->deleted)
+ {
+ connect = connect->next;
+ continue;
+ }
+
+ if(connect->prevInScaf || connect->nextInScaf)
+ flag = 1;
+ else
+ flag = 0;
+
+ if(!connect->mask)
+ fprintf(fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\"];\n"
+ , i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
+ connect->gapLen, flag, connect->weight);
+ else
+ fprintf(fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\", color = red];\n"
+ , i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
+ connect->gapLen, flag, connect->weight);
+
+ connect = connect->next;
+ }
+ }
+
+ fprintf(fp, "}\n");
+ fclose(fp);
+}
+
+/***************** below this line all codes are about lookup table *****************/
+
+void createCntLookupTable()
+{
+ if(!cntLookupTable)
+ cntLookupTable = (CONNECT **)ckalloc((3 * num_ctg + 1) * sizeof(CONNECT *));
+}
+
+void deleteCntLookupTable()
+{
+ if(cntLookupTable)
+ {
+ free((void *)cntLookupTable);
+ cntLookupTable = NULL;
+ }
+}
+
+void putCnt2LookupTable(unsigned int from_c, CONNECT *cnt)
+{
+ if(!cnt || !cntLookupTable)
+ return;
+
+ unsigned int index = 2 * from_c + cnt->contigID;
+ cnt->nextInLookupTable = cntLookupTable[index];
+ cntLookupTable[index] = cnt;
+}
+
+static CONNECT *getCntInLookupTable(unsigned int from_c, unsigned int to_c)
+{
+ unsigned int index = 2 * from_c + to_c;
+ CONNECT *ite_cnt = cntLookupTable[index];
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->contigID == to_c)
+ return ite_cnt;
+
+ ite_cnt = ite_cnt->nextInLookupTable;
+ }
+
+ return NULL;
+}
+
+CONNECT *getCntBetween(unsigned int from_c, unsigned int to_c)
+{
+ CONNECT *pcnt;
+
+ if(cntLookupTable)
+ {
+ pcnt = getCntInLookupTable(from_c, to_c);
+ return pcnt;
+ }
+
+ pcnt = contig_array[from_c].downwardConnect;
+
+ while(pcnt)
+ {
+ if(pcnt->contigID == to_c)
+ return pcnt;
+
+ pcnt = pcnt->next;
+ }
+
+ return pcnt;
+}
+/*
+void removeCntInLookupTable(unsigned int from_c,unsigned int to_c)
+{
+ unsigned int index = 2*from_c + to_c;
+ CONNECT *ite_cnt = cntLookupTable[index];
+ CONNECT *cnt;
+
+ if(!ite_cnt){
+ printf("removeCntInLookupTable: not found A\n");
+ return;
+ }
+ if(ite_cnt->contigID==to_c){
+ cntLookupTable[index] = ite_cnt->nextInLookupTable;
+ return;
+ }
+
+ while(ite_cnt->nextInLookupTable&&ite_cnt->nextInLookupTable->contigID!=to_c)
+ ite_cnt = ite_cnt->nextInLookupTable;
+
+ if(ite_cnt->nextInLookupTable){
+ cnt = ite_cnt->nextInLookupTable;
+ ite_cnt->nextInLookupTable = cnt->nextInLookupTable;
+ return;
+ }
+ printf("removeCntInLookupTable: not found B\n");
+ return;
+}
+*/
diff --git a/fusion/darray.c b/fusion/darray.c
new file mode 100644
index 0000000..7bc1ee9
--- /dev/null
+++ b/fusion/darray.c
@@ -0,0 +1,60 @@
+#include "darray.h"
+#include "check.h"
+
+DARRAY *createDarray(int num_items, size_t unit_size)
+{
+ DARRAY *newDarray = (DARRAY *)malloc(1 * sizeof(DARRAY));
+
+ newDarray->array_size = num_items;
+ newDarray->item_size = unit_size;
+ newDarray->item_c = 0;
+ newDarray->array = (void *)ckalloc(num_items * unit_size);
+ return newDarray;
+}
+
+void *darrayPut(DARRAY *darray, long long index)
+{
+ int i = 2;
+
+ if(index + 1 > darray->item_c)
+ darray->item_c = index + 1;
+
+ if(index < darray->array_size)
+ return darray->array + darray->item_size * index;
+
+ while(index > i * darray->array_size)
+ i++;
+
+ darray->array = (void *)ckrealloc(darray->array, i * darray->array_size * darray->item_size
+ , darray->array_size * darray->item_size);
+ darray->array_size *= i;
+ return (void *)((void *)darray->array + darray->item_size * index);
+}
+
+void *darrayGet(DARRAY *darray, long long index)
+{
+ if(index < darray->array_size)
+ return (void *)((void *)darray->array + darray->item_size * index);
+
+ printf("array read index %lld out of range %lld\n", index, darray->array_size);
+ return NULL;
+}
+
+
+void emptyDarray(DARRAY *darray)
+{
+ darray->item_c = 0;
+}
+
+void freeDarray(DARRAY *darray)
+{
+
+ if(!darray)
+ return;
+
+ if(darray->array)
+ free((void *)darray->array);
+
+ free((void *)darray);
+}
+
diff --git a/fusion/fib.c b/fusion/fib.c
new file mode 100644
index 0000000..4d10a57
--- /dev/null
+++ b/fusion/fib.c
@@ -0,0 +1,691 @@
+/*
+Copyright 2007, 2008 Daniel Zerbino (zerbino at ebi.ac.uk)
+
+ This file is part of Velvet.
+
+ Velvet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ Velvet is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Velvet; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+/*-
+ * Copyright 1997-2003 John-Mark Gurney.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: fib.c,v 1.10 2007/10/19 13:09:26 zerbino Exp $
+ *
+ */
+#include <limits.h>
+#include <stdlib.h>
+#include "fib.h"
+#include "fibpriv.h"
+#include "extfunc2.h"
+
+#define HEAPBLOCKSIZE 10000
+
+static int fh_comparedata(FibHeap *h, Coordinate key, unsigned int data, FibHeapNode *b);
+unsigned int fh_replacekeydata(FibHeap *h, FibHeapNode *x, Coordinate key, unsigned int data);
+
+static FibHeapNode *allocateFibHeapEl(FibHeap *heap)
+{
+ return (FibHeapNode *)getItem(heap->nodeMemory);
+};
+
+static void deallocateFibHeapEl(FibHeapNode *a, FibHeap *heap)
+{
+ returnItem(heap->nodeMemory, a);
+}
+
+#define swap(type, a, b) \
+ do { \
+ type c; \
+ c = a; \
+ a = b; \
+ b = c; \
+ } while (0) \
+
+#define INT_BITS (sizeof(IDnum) * 8)
+
+static inline IDnum ceillog2(IDnum a)
+{
+ IDnum oa;
+ IDnum i;
+ IDnum b;
+ IDnum cons;
+
+ oa = a;
+ b = INT_BITS / 2;
+ i = 0;
+
+ while (b)
+ {
+ i = (i << 1);
+ cons = ((IDnum) 1) << b;
+
+ if (a >= cons)
+ {
+ a /= cons;
+ i = i | 1;
+ }
+ else
+ a &= cons - 1;
+
+ b /= 2;
+ }
+
+ if ((((IDnum) 1 << i)) == oa)
+ return i;
+ else
+ return i + 1;
+}
+
+/*
+ * Private Heap Functions
+ */
+static void fh_initheap(FibHeap *new)
+{
+ new->fh_cmp_fnct = NULL;
+ new->nodeMemory = createMem_manager(sizeof(FibHeapNode), HEAPBLOCKSIZE);
+ new->fh_neginf = 0;
+ new->fh_n = 0;
+ new->fh_Dl = -1;
+ new->fh_cons = NULL;
+ new->fh_min = NULL;
+ new->fh_root = NULL;
+ new->fh_keys = 0;
+}
+
+static void fh_destroyheap(FibHeap *h)
+{
+ h->fh_cmp_fnct = NULL;
+ h->fh_neginf = 0;
+
+ if (h->fh_cons != NULL)
+ free(h->fh_cons);
+
+ h->fh_cons = NULL;
+ free(h);
+}
+
+/*
+ * Public Heap Functions
+ */
+FibHeap *fh_makekeyheap()
+{
+ FibHeap *n;
+
+ if ((n = malloc(sizeof * n)) == NULL)
+ return NULL;
+
+ fh_initheap(n);
+ n->fh_keys = 1;
+
+ return n;
+}
+
+FibHeap *fh_makeheap()
+{
+ FibHeap *n;
+
+ if ((n = malloc(sizeof * n)) == NULL)
+ return NULL;
+
+ fh_initheap(n);
+
+ return n;
+}
+
+voidcmp fh_setcmp(FibHeap *h, voidcmp fnct)
+{
+ voidcmp oldfnct;
+
+ oldfnct = h->fh_cmp_fnct;
+ h->fh_cmp_fnct = fnct;
+
+ return oldfnct;
+}
+
+unsigned int fh_setneginf(FibHeap *h, unsigned int data)
+{
+ unsigned int old;
+
+ old = h->fh_neginf;
+ h->fh_neginf = data;
+
+ return old;
+}
+
+FibHeap *fh_union(FibHeap *ha, FibHeap *hb)
+{
+ FibHeapNode *x;
+
+ if (ha->fh_root == NULL || hb->fh_root == NULL)
+ {
+ /* either one or both are empty */
+ if (ha->fh_root == NULL)
+ {
+ fh_destroyheap(ha);
+ return hb;
+ }
+ else
+ {
+ fh_destroyheap(hb);
+ return ha;
+ }
+ }
+
+ ha->fh_root->fhe_left->fhe_right = hb->fh_root;
+ hb->fh_root->fhe_left->fhe_right = ha->fh_root;
+ x = ha->fh_root->fhe_left;
+ ha->fh_root->fhe_left = hb->fh_root->fhe_left;
+ hb->fh_root->fhe_left = x;
+ ha->fh_n += hb->fh_n;
+ /*
+ * we probably should also keep stats on number of unions
+ */
+
+ /* set fh_min if necessary */
+ if (fh_compare(ha, hb->fh_min, ha->fh_min) < 0)
+ ha->fh_min = hb->fh_min;
+
+ fh_destroyheap(hb);
+ return ha;
+}
+
+void fh_deleteheap(FibHeap *h)
+{
+ freeMem_manager(h->nodeMemory);
+ h->nodeMemory = NULL;
+ fh_destroyheap(h);
+}
+
+/*
+ * Public Key Heap Functions
+ */
+FibHeapNode *fh_insertkey(FibHeap *h, Coordinate key, unsigned int data)
+{
+ FibHeapNode *x;
+
+ if ((x = fhe_newelem(h)) == NULL)
+ return NULL;
+
+ /* just insert on root list, and make sure it's not the new min */
+ x->fhe_data = data;
+ x->fhe_key = key;
+
+ fh_insertel(h, x);
+
+ return x;
+}
+
+boolean fh_isempty(FibHeap *h)
+{
+
+ if (h->fh_min == NULL)
+ return 1;
+ else
+ return 0;
+
+}
+
+Coordinate fh_minkey(FibHeap *h)
+{
+ if (h->fh_min == NULL)
+ return INT_MIN;
+
+ return h->fh_min->fhe_key;
+}
+
+
+unsigned int fh_replacekeydata(FibHeap *h, FibHeapNode *x,
+ Coordinate key, unsigned int data)
+{
+ unsigned int odata;
+ Coordinate okey;
+ FibHeapNode *y;
+ int r;
+
+ odata = x->fhe_data;
+ okey = x->fhe_key;
+
+ /*
+ * we can increase a key by deleting and reinserting, that
+ * requires O(lgn) time.
+ */
+ if ((r = fh_comparedata(h, key, data, x)) > 0)
+ {
+ /* XXX - bad code! */
+ abort();
+ }
+
+ x->fhe_data = data;
+ x->fhe_key = key;
+
+ /* because they are equal, we don't have to do anything */
+ if (r == 0)
+ return odata;
+
+ y = x->fhe_p;
+
+ if (h->fh_keys && okey == key)
+ return odata;
+
+ if (y != NULL && fh_compare(h, x, y) <= 0)
+ {
+ fh_cut(h, x, y);
+ fh_cascading_cut(h, y);
+ }
+
+ /*
+ * the = is so that the call from fh_delete will delete the proper
+ * element.
+ */
+ if (fh_compare(h, x, h->fh_min) <= 0)
+ h->fh_min = x;
+
+ return odata;
+}
+
+Coordinate fh_replacekey(FibHeap *h, FibHeapNode *x, Coordinate key)
+{
+ Coordinate ret;
+
+ ret = x->fhe_key;
+ (void) fh_replacekeydata(h, x, key, x->fhe_data);
+
+ return ret;
+}
+
+/*
+ * Public void * Heap Functions
+ */
+/*
+ * this will return these values:
+ * NULL failed for some reason
+ * ptr token to use for manipulation of data
+ */
+FibHeapNode *fh_insert(FibHeap *h, unsigned int data)
+{
+ FibHeapNode *x;
+
+ if ((x = fhe_newelem(h)) == NULL)
+ return NULL;
+
+ /* just insert on root list, and make sure it's not the new min */
+ x->fhe_data = data;
+
+ fh_insertel(h, x);
+
+ return x;
+}
+
+unsigned int fh_min(FibHeap *h)
+{
+ if (h->fh_min == NULL)
+ return 0;
+
+ return h->fh_min->fhe_data;
+}
+
+unsigned int fh_extractmin(FibHeap *h)
+{
+ FibHeapNode *z;
+ unsigned int ret = 0;
+
+
+ if (h->fh_min != NULL)
+ {
+ z = fh_extractminel(h);
+ ret = z->fhe_data;
+#ifndef NO_FREE
+ deallocateFibHeapEl(z, h);
+#endif
+
+ }
+
+ return ret;
+}
+
+unsigned int fh_replacedata(FibHeapNode *x, unsigned int data)
+{
+ unsigned int odata = x->fhe_data;
+ x->fhe_data = data;
+ return odata;
+}
+
+unsigned int fh_delete(FibHeap *h, FibHeapNode *x)
+{
+ unsigned int k;
+
+ k = x->fhe_data;
+
+ if (!h->fh_keys)
+ fh_replacedata(x, h->fh_neginf);
+ else
+ fh_replacekey(h, x, INT_MIN);
+
+ fh_extractmin(h);
+
+ return k;
+}
+
+/*
+ * begin of private element fuctions
+ */
+static FibHeapNode *fh_extractminel(FibHeap *h)
+{
+ FibHeapNode *ret;
+ FibHeapNode *x, *y, *orig;
+
+ ret = h->fh_min;
+
+ orig = NULL;
+
+ /* put all the children on the root list */
+ /* for true consistancy, we should use fhe_remove */
+ for (x = ret->fhe_child; x != orig && x != NULL;)
+ {
+ if (orig == NULL)
+ orig = x;
+
+ y = x->fhe_right;
+ x->fhe_p = NULL;
+ fh_insertrootlist(h, x);
+ x = y;
+ }
+
+ /* remove minimum from root list */
+ fh_removerootlist(h, ret);
+ h->fh_n--;
+
+ /* if we aren't empty, consolidate the heap */
+ if (h->fh_n == 0)
+ h->fh_min = NULL;
+ else
+ {
+ h->fh_min = ret->fhe_right;
+ fh_consolidate(h);
+ }
+
+ return ret;
+}
+
+static void fh_insertrootlist(FibHeap *h, FibHeapNode *x)
+{
+ if (h->fh_root == NULL)
+ {
+ h->fh_root = x;
+ x->fhe_left = x;
+ x->fhe_right = x;
+ return;
+ }
+
+ fhe_insertafter(h->fh_root, x);
+}
+
+static void fh_removerootlist(FibHeap *h, FibHeapNode *x)
+{
+ if (x->fhe_left == x)
+ h->fh_root = NULL;
+ else
+ h->fh_root = fhe_remove(x);
+}
+
+static void fh_consolidate(FibHeap *h)
+{
+ FibHeapNode **a;
+ FibHeapNode *w;
+ FibHeapNode *y;
+ FibHeapNode *x;
+ IDnum i;
+ IDnum d;
+ IDnum D;
+
+ fh_checkcons(h);
+
+ /* assign a the value of h->fh_cons so I don't have to rewrite code */
+ D = h->fh_Dl + 1;
+ a = h->fh_cons;
+
+ for (i = 0; i < D; i++)
+ a[i] = NULL;
+
+ while ((w = h->fh_root) != NULL)
+ {
+ x = w;
+ fh_removerootlist(h, w);
+ d = x->fhe_degree;
+
+ /* XXX - assert that d < D */
+ while (a[d] != NULL)
+ {
+ y = a[d];
+
+ if (fh_compare(h, x, y) > 0)
+ swap(FibHeapNode *, x, y);
+
+ fh_heaplink(h, y, x);
+ a[d] = NULL;
+ d++;
+ }
+
+ a[d] = x;
+ }
+
+ h->fh_min = NULL;
+
+ for (i = 0; i < D; i++)
+ if (a[i] != NULL)
+ {
+ fh_insertrootlist(h, a[i]);
+
+ if (h->fh_min == NULL
+ || fh_compare(h, a[i], h->fh_min) < 0)
+ h->fh_min = a[i];
+ }
+}
+
+static void fh_heaplink(FibHeap *h, FibHeapNode *y, FibHeapNode *x)
+{
+ /* make y a child of x */
+ if (x->fhe_child == NULL)
+ x->fhe_child = y;
+ else
+ fhe_insertbefore(x->fhe_child, y);
+
+ y->fhe_p = x;
+ x->fhe_degree++;
+ y->fhe_mark = 0;
+}
+
+static void fh_cut(FibHeap *h, FibHeapNode *x, FibHeapNode *y)
+{
+ fhe_remove(x);
+ y->fhe_degree--;
+ fh_insertrootlist(h, x);
+ x->fhe_p = NULL;
+ x->fhe_mark = 0;
+}
+
+static void fh_cascading_cut(FibHeap *h, FibHeapNode *y)
+{
+ FibHeapNode *z;
+
+ while ((z = y->fhe_p) != NULL)
+ {
+ if (y->fhe_mark == 0)
+ {
+ y->fhe_mark = 1;
+ return;
+ }
+ else
+ {
+ fh_cut(h, y, z);
+ y = z;
+ }
+ }
+}
+
+/*
+ * begining of handling elements of fibheap
+ */
+static FibHeapNode *fhe_newelem(FibHeap *h)
+{
+ FibHeapNode *e;
+
+ if ((e = allocateFibHeapEl(h)) == NULL)
+ return NULL;
+
+ fhe_initelem(e);
+
+ return e;
+}
+
+static void fhe_initelem(FibHeapNode *e)
+{
+ e->fhe_degree = 0;
+ e->fhe_mark = 0;
+ e->fhe_p = NULL;
+ e->fhe_child = NULL;
+ e->fhe_left = e;
+ e->fhe_right = e;
+ e->fhe_data = 0;
+}
+
+static void fhe_insertafter(FibHeapNode *a, FibHeapNode *b)
+{
+ if (a == a->fhe_right)
+ {
+ a->fhe_right = b;
+ a->fhe_left = b;
+ b->fhe_right = a;
+ b->fhe_left = a;
+ }
+ else
+ {
+ b->fhe_right = a->fhe_right;
+ a->fhe_right->fhe_left = b;
+ a->fhe_right = b;
+ b->fhe_left = a;
+ }
+}
+
+static inline void fhe_insertbefore(FibHeapNode *a, FibHeapNode *b)
+{
+ fhe_insertafter(a->fhe_left, b);
+}
+
+static FibHeapNode *fhe_remove(FibHeapNode *x)
+{
+ FibHeapNode *ret;
+
+ if (x == x->fhe_left)
+ ret = NULL;
+ else
+ ret = x->fhe_left;
+
+ /* fix the parent pointer */
+ if (x->fhe_p != NULL && x->fhe_p->fhe_child == x)
+ x->fhe_p->fhe_child = ret;
+
+ x->fhe_right->fhe_left = x->fhe_left;
+ x->fhe_left->fhe_right = x->fhe_right;
+
+ /* clear out hanging pointers */
+ x->fhe_p = NULL;
+ x->fhe_left = x;
+ x->fhe_right = x;
+
+ return ret;
+}
+
+static void fh_checkcons(FibHeap *h)
+{
+ IDnum oDl;
+
+ /* make sure we have enough memory allocated to "reorganize" */
+ if (h->fh_Dl == -1 || h->fh_n > (1 << h->fh_Dl))
+ {
+ oDl = h->fh_Dl;
+
+ if ((h->fh_Dl = ceillog2(h->fh_n) + 1) < 8)
+ h->fh_Dl = 8;
+
+ if (oDl != h->fh_Dl)
+ h->fh_cons =
+ (FibHeapNode **) realloc(h->fh_cons,
+ sizeof * h->
+ fh_cons *
+ (h->fh_Dl + 1));
+
+ if (h->fh_cons == NULL)
+ abort();
+ }
+}
+
+static int fh_compare(FibHeap *h, FibHeapNode *a, FibHeapNode *b)
+{
+ if (a->fhe_key < b->fhe_key)
+ return -1;
+
+ if (a->fhe_key == b->fhe_key)
+ return 0;
+
+ return 1;
+}
+
+static int
+fh_comparedata(FibHeap *h, Coordinate key, unsigned int data, FibHeapNode *b)
+{
+ FibHeapNode a;
+
+ a.fhe_key = key;
+ a.fhe_data = data;
+
+ return fh_compare(h, &a, b);
+}
+
+static void fh_insertel(FibHeap *h, FibHeapNode *x)
+{
+ fh_insertrootlist(h, x);
+
+ if (h->fh_min == NULL
+ || (h->fh_keys ? x->fhe_key <
+ h->fh_min->fhe_key : h->fh_cmp_fnct(x->fhe_data,
+ h->fh_min->fhe_data) <
+ 0))
+ h->fh_min = x;
+
+ h->fh_n++;
+}
diff --git a/fusion/fibHeap.c b/fusion/fibHeap.c
new file mode 100644
index 0000000..4623d05
--- /dev/null
+++ b/fusion/fibHeap.c
@@ -0,0 +1,77 @@
+/*
+Copyright 2007, 2008 Daniel Zerbino (zerbino at ebi.ac.uk)
+
+ This file is part of Velvet.
+
+ Velvet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ Velvet is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Velvet; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+#include "fib.h"
+
+// Constructor
+// Memory allocated
+FibHeap *newFibHeap()
+{
+ return fh_makekeyheap();
+}
+
+// Add new node into heap with a key, and a pointer to the specified node
+FibHeapNode *insertNodeIntoHeap(FibHeap *heap, Coordinate key,
+ unsigned int node)
+{
+ return fh_insertkey(heap, key, node);
+}
+
+// Returns smallest key in heap
+Coordinate minKeyOfHeap(FibHeap *heap)
+{
+ return fh_minkey(heap);
+}
+
+// Replaces the key for a given node
+Coordinate replaceKeyInHeap(FibHeap *heap, FibHeapNode *node,
+ Coordinate newKey)
+{
+ return fh_replacekey(heap, node, newKey);
+}
+
+// Removes the node with the shortest key, then returns it.
+unsigned int removeNextNodeFromHeap(FibHeap *heap)
+{
+ return (unsigned int) fh_extractmin(heap);
+}
+
+boolean IsHeapEmpty(FibHeap *heap)
+{
+ return fh_isempty(heap);
+}
+
+// Destructor
+void destroyHeap(FibHeap *heap)
+{
+ fh_deleteheap(heap);
+}
+
+// Replace the node pointed to by a heap node
+void replaceValueInHeap(FibHeapNode *node, unsigned int newValue)
+{
+ fh_replacedata(node, newValue);
+}
+
+// Remove unwanted node
+void destroyNodeInHeap(FibHeapNode *node, FibHeap *heap)
+{
+ fh_delete(heap, node);
+}
diff --git a/fusion/finalFusion b/fusion/finalFusion
new file mode 100755
index 0000000..c5c46b8
Binary files /dev/null and b/fusion/finalFusion differ
diff --git a/fusion/hashFunction.c b/fusion/hashFunction.c
new file mode 100644
index 0000000..c3aa270
--- /dev/null
+++ b/fusion/hashFunction.c
@@ -0,0 +1,86 @@
+#include <stdinc.h>
+
+
+#define KMER_HASH_MASK 0x0000000000ffffffL
+#define KMER_HASH_BUCKETS 16777216 // 4^12
+
+static int crc_table[256] =
+{
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
+ 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
+ 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
+ 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
+ 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
+ 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
+ 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
+ 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
+ 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
+ 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
+ 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
+ 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
+ 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
+ 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
+ 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
+ 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
+ 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
+ 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
+ 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
+ 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
+ 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
+ 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
+ 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
+ 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
+ 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
+ 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
+ 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
+ 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
+ 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
+ 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
+ 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
+ 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
+ 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
+ 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
+ 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
+ 0x2d02ef8d
+};
+
+static int crc32(int crc, const char *buf, int len)
+{
+ if (buf == NULL)
+ return 0;
+
+ crc = crc ^ 0xffffffff;
+
+ while (len--)
+ {
+ crc =
+ crc_table[((int) crc ^ (*buf++)) & 0xff] ^ (crc >> 8);
+ }
+
+ return crc ^ 0xffffffff;
+}
+
+Kmer hash_kmer(Kmer kmer)
+{
+ Kmer hash;
+ hash = kmer;
+ hash = crc32(0, (char *) &kmer, sizeof(Kmer));
+ hash &= KMER_HASH_MASK;
+ return hash;
+}
diff --git a/fusion/inc/check.h b/fusion/inc/check.h
new file mode 100755
index 0000000..db8f1ca
--- /dev/null
+++ b/fusion/inc/check.h
@@ -0,0 +1,5 @@
+
+extern void *ckalloc(unsigned long long amount);
+extern void *ckrealloc(void *p, size_t new_size, size_t old_size);
+extern FILE *ckopen(char *name, char *mode);
+
diff --git a/fusion/inc/darray.h b/fusion/inc/darray.h
new file mode 100644
index 0000000..3cb52fb
--- /dev/null
+++ b/fusion/inc/darray.h
@@ -0,0 +1,23 @@
+#ifndef __DARRAY__
+#define __DARRAY__
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+typedef struct dynamic_array
+{
+ void *array;
+ long long array_size;
+ size_t item_size;
+ long long item_c;
+} DARRAY;
+
+void *darrayPut(DARRAY *darray, long long index);
+void *darrayGet(DARRAY *darray, long long index);
+DARRAY *createDarray(int num_items, size_t unit_size);
+void freeDarray(DARRAY *darray);
+void emptyDarray(DARRAY *darray);
+
+#endif
+
diff --git a/fusion/inc/def.h b/fusion/inc/def.h
new file mode 100644
index 0000000..9ed70e4
--- /dev/null
+++ b/fusion/inc/def.h
@@ -0,0 +1,299 @@
+/* this file provides some datatype definition */
+#ifndef _DEF
+#define _DEF
+
+#include "def2.h"
+#include "types.h"
+#include "stack.h"
+#include "darray.h"
+
+#define EDGE_BIT_SIZE 6
+#define word_len 12
+#define taskMask 0xf //the last 7 bits
+
+#define MaxEdgeCov 16000
+
+#define base2int(base) (char)(((base)&0x06)>>1)
+#define int2base(seq) "ACTG"[seq]
+#define int2compbase(seq) "TGAC"[seq]
+#define int_comp(seq) (char)(seq^0x02) //(char)((0x4E>>((seq)<<1))&0x03)
+
+int b_ban;
+
+typedef unsigned long long Kmer;
+
+typedef struct edon
+{
+ Kmer kmer;
+ unsigned int ctgLen: 1;
+ unsigned int twin: 1;
+ unsigned int pos: 30;
+ unsigned int ctgID;
+ struct edon *left;
+ struct edon *right;
+} EDON;
+
+struct node_pt;
+
+typedef struct node
+{
+ Kmer kmer;
+ unsigned char links;
+ unsigned char linksB;
+ unsigned char cvg;
+ unsigned char linear: 1;
+ unsigned char deleted: 1;
+ unsigned char mark: 1;
+ unsigned int to_end; // the edge no. it belongs to
+ struct node *left;
+ struct node *right;
+} NODE;
+
+typedef struct node_pt
+{
+ NODE *node;
+ Kmer kmer;
+ boolean isSmaller;
+ struct node_pt *next;
+} NODE_PT;
+
+typedef struct preedge
+{
+ Kmer from_node;
+ Kmer to_node;
+ char *seq;
+ int length;
+ unsigned short cvg;
+ unsigned short bal_edge: 2; //indicate whether it's bal_edge is the previous edge, next edge or itself
+} preEDGE;
+
+typedef struct readinterval
+{
+ int readid;
+ unsigned int edgeid;
+ int start;
+ struct readinterval *bal_rv;
+ struct readinterval *nextOnEdge;
+ struct readinterval *prevOnEdge;
+ struct readinterval *nextInRead;
+ struct readinterval *prevInRead;
+} READINTERVAL;
+
+struct arc;
+typedef struct edge
+{
+ unsigned int from_vt;
+ unsigned int to_vt;
+ int length;
+ unsigned short cvg: 14;
+ unsigned short bal_edge: 2;
+ unsigned short multi: 14;
+ unsigned short deleted : 1;
+ unsigned short flag : 1;
+ char *seq;
+ READINTERVAL *rv;
+ struct arc *arcs;
+ long long *markers;
+} EDGE;
+
+typedef struct edge_pt
+{
+ EDGE *edge;
+ struct edge_pt *next;
+} EDGE_PT;
+
+typedef struct vertex
+{
+ Kmer kmer;
+} VERTEX;
+
+typedef struct connection
+{
+ unsigned int contigID;
+ int gapLen;
+
+ unsigned short maxGap;
+ unsigned char minGap;
+ unsigned char bySmall: 1;
+ unsigned char weakPoint: 1;
+
+ unsigned char weightNotInherit;
+ unsigned char weight;
+ unsigned char maxSingleWeight;
+ unsigned char mask : 1;
+ unsigned char used : 1;
+ unsigned char weak : 1;
+ unsigned char deleted : 1;
+ unsigned char prevInScaf : 1;
+ unsigned char inherit : 1;
+ unsigned char checking : 1;
+ unsigned char singleInScaf : 1;
+ struct connection *nextInScaf;
+ struct connection *next;
+ struct connection *nextInLookupTable;
+ int *PE;
+} CONNECT;
+
+typedef struct prearc
+{
+ unsigned int to_ed;
+ unsigned int multiplicity;
+ struct prearc *next;
+} preARC;
+
+typedef struct contig
+{
+ unsigned int from_vt;
+ unsigned int to_vt;
+ unsigned int length;
+ unsigned short indexInScaf;
+ unsigned char cvg;
+ unsigned char bal_edge: 2; // 0, 1 or 2
+ unsigned char mask : 1;
+ unsigned char flag : 1;
+ unsigned char multi: 1;
+ unsigned char inSubGraph: 1;
+ char *seq;
+ CONNECT *downwardConnect;
+ preARC *arcs;
+ STACK *closeReads;
+} CONTIG;
+
+typedef struct read_nearby
+{
+ int len;
+ int dis; // dis to nearby contig or scaffold's start position
+ long long seqStarter; //sequence start position in dynamic array
+} READNEARBY;
+
+typedef struct annotation
+{
+ unsigned long long readID;
+ unsigned int contigID;
+ int pos;
+} ANNOTATION;
+
+typedef struct parameter
+{
+ unsigned char threadID;
+ void **hash_table;
+ unsigned char *mainSignal;
+ unsigned char *selfSignal;
+} PARAMETER;
+
+typedef struct lightannot
+{
+ int contigID;
+ int pos;
+} LIGHTANNOT;
+
+typedef struct edgepatch
+{
+ Kmer from_kmer, to_kmer;
+ unsigned int length;
+ char bal_edge;
+} EDGEPATCH;
+
+typedef struct lightctg
+{
+ unsigned int index;
+ int length;
+ char *seq;
+} LIGHTCTG;
+
+
+typedef struct arc
+{
+ unsigned int to_ed;
+ unsigned int multiplicity;
+ struct arc *prev;
+ struct arc *next;
+ struct arc *bal_arc;
+ struct arc *nextInLookupTable;
+} ARC;
+
+typedef struct arcexist
+{
+ Kmer kmer;
+ struct arcexist *left;
+ struct arcexist *right;
+} ARCEXIST;
+
+typedef struct lib_info
+{
+ int min_ins;
+ int max_ins;
+ int avg_ins;
+ int rd_len_cutoff;
+ int reverse;
+ int asm_flag;
+ int map_len;
+ int pair_num_cut;
+ int rank;
+ //indicate which file is next to be read
+ int curr_type;
+ int curr_index;
+
+ //file handlers to opened files
+ FILE *fp1;
+ FILE *fp2;
+ boolean f1_start;
+ boolean f2_start;
+ //whether last read is read1 in pair
+ int paired; // 0 -- single; 1 -- read1; 2 -- read2;
+
+ //type1
+ char **a1_fname;
+ char **a2_fname;
+ int num_a1_file;
+ int num_a2_file;
+
+ //type2
+ char **q1_fname;
+ char **q2_fname;
+ int num_q1_file;
+ int num_q2_file;
+
+ //type3
+ char **p_fname;
+ int num_p_file; //fasta only
+
+ //type4 &5
+ char **s_a_fname;
+ int num_s_a_file;
+ char **s_q_fname;
+ int num_s_q_file;
+
+} LIB_INFO;
+
+typedef struct ctg4heap
+{
+ unsigned int ctgID;
+ int dis;
+ unsigned char ds_shut4dheap: 1; // ignore downstream connections
+ unsigned char us_shut4dheap: 1; // ignore upstream connections
+ unsigned char ds_shut4uheap: 1; // ignore downstream connections
+ unsigned char us_shut4uheap: 1; // ignore upstream connections
+} CTGinHEAP;
+
+typedef struct ctg4scaf
+{
+ unsigned int ctgID;
+ int start;
+ int end; //position in scaff
+ unsigned int cutHead : 8; //
+ unsigned int cutTail : 7; //
+ unsigned int scaftig_start : 1; //is it a scaftig starter
+ unsigned int mask : 1; // is it masked for further operations
+ unsigned int gapSeqLen: 15;
+ int gapSeqOffset;
+} CTGinSCAF;
+
+typedef struct pe_info
+{
+ int insertS;
+ long long PE_bound;
+ int rank;
+ int pair_num_cut;
+} PE_INFO;
+#endif
diff --git a/fusion/inc/def2.h b/fusion/inc/def2.h
new file mode 100644
index 0000000..e4aa72b
--- /dev/null
+++ b/fusion/inc/def2.h
@@ -0,0 +1,44 @@
+#ifndef _DEF2
+#define _DEF2
+typedef char boolean;
+typedef long long IDnum;
+typedef double Time;
+typedef long long Coordinate;
+// Fibonacci heaps used mainly in Tour Bus
+typedef struct fibheap FibHeap;
+typedef struct fibheap_el FibHeapNode;
+typedef struct dfibheap DFibHeap;
+typedef struct dfibheap_el DFibHeapNode;
+//Memory manager
+typedef struct block_start
+{
+ struct block_start *next;
+} BLOCK_START;
+
+typedef struct recycle_mark
+{
+ struct recycle_mark *next;
+} RECYCLE_MARK;
+
+typedef struct mem_manager
+{
+ BLOCK_START *block_list;
+ int index_in_block;
+ int items_per_block;
+ size_t item_size;
+ RECYCLE_MARK *recycle_list;
+ unsigned long long counter;
+} MEM_MANAGER;
+
+struct dfibheap_el
+{
+ int dfhe_degree;
+ boolean dfhe_mark;
+ DFibHeapNode *dfhe_p;
+ DFibHeapNode *dfhe_child;
+ DFibHeapNode *dfhe_left;
+ DFibHeapNode *dfhe_right;
+ Time dfhe_key;
+ unsigned int dfhe_data;//void *dfhe_data;
+};
+#endif
diff --git a/standardPregraph/inc/dfib.h b/fusion/inc/dfib.h
old mode 100644
new mode 100755
similarity index 52%
copy from standardPregraph/inc/dfib.h
copy to fusion/inc/dfib.h
index d0ad90a..fa96304
--- a/standardPregraph/inc/dfib.h
+++ b/fusion/inc/dfib.h
@@ -1,10 +1,23 @@
/*
- * inc/dfib.h
- *
- * This file is part of SOAPdenovo.
- *
- */
+Copyright 2007, 2008 Daniel Zerbino (zerbino at ebi.ac.uk)
+
+ This file is part of Velvet.
+
+ Velvet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ Velvet is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Velvet; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
/*-
* Copyright 1997, 1998-2003 John-Mark Gurney.
* All rights reserved.
@@ -30,7 +43,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: dfib.h,v 1.8 2007/04/24 12:16:41 zerbino Exp $
+ * $Id: dfib.h,v 1.8 2007/04/24 12:16:41 zerbino Exp $
*
*/
@@ -41,19 +54,19 @@
#include "def2.h" //#include "globals.h"
/* functions for key heaps */
-DFibHeap * dfh_makekeyheap ( void );
-DFibHeapNode * dfh_insertkey ( DFibHeap *, Time, unsigned int );
-Time dfh_replacekey ( DFibHeap *, DFibHeapNode *, Time );
-unsigned int dfh_replacekeydata ( DFibHeap *, DFibHeapNode *, Time, unsigned int );
+DFibHeap *dfh_makekeyheap(void);
+DFibHeapNode *dfh_insertkey(DFibHeap *, Time, unsigned int);
+Time dfh_replacekey(DFibHeap *, DFibHeapNode *, Time);
+unsigned int dfh_replacekeydata(DFibHeap *, DFibHeapNode *, Time, unsigned int);
-unsigned int dfh_extractmin ( DFibHeap * );
-unsigned int dfh_replacedata ( DFibHeapNode *, unsigned int );
-unsigned int dfh_delete ( DFibHeap *, DFibHeapNode * );
-void dfh_deleteheap ( DFibHeap * );
+unsigned int dfh_extractmin(DFibHeap *);
+unsigned int dfh_replacedata(DFibHeapNode *, unsigned int);
+unsigned int dfh_delete(DFibHeap *, DFibHeapNode *);
+void dfh_deleteheap(DFibHeap *);
// DEBUG
-IDnum dfibheap_getSize ( DFibHeap * );
-Time dfibheap_el_getKey ( DFibHeapNode * );
+IDnum dfibheap_getSize(DFibHeap *);
+Time dfibheap_el_getKey(DFibHeapNode *);
// END DEBUG
-#endif /* _FIB_H_ */
+#endif /* _FIB_H_ */
diff --git a/fusion/inc/dfibHeap.h b/fusion/inc/dfibHeap.h
new file mode 100644
index 0000000..ee9f243
--- /dev/null
+++ b/fusion/inc/dfibHeap.h
@@ -0,0 +1,43 @@
+/*
+Copyright 2007, 2008 Daniel Zerbino (zerbino at ebi.ac.uk)
+
+ This file is part of Velvet.
+
+ Velvet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ Velvet is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Velvet; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+#ifndef _DFIBHEAP_H_
+#define _DFIBHEAP_H_
+
+DFibHeap *newDFibHeap();
+
+DFibHeapNode *insertNodeIntoDHeap(DFibHeap *heap, Time key, unsigned int node);
+
+Time replaceKeyInDHeap(DFibHeap *heap, DFibHeapNode *node, Time newKey);
+
+unsigned int removeNextNodeFromDHeap(DFibHeap *heap);
+
+void destroyDHeap(DFibHeap *heap);
+
+boolean HasMin(DFibHeap *h);
+
+void replaceValueInDHeap(DFibHeapNode *node, unsigned int newValue);
+
+void *destroyNodeInDHeap(DFibHeapNode *node, DFibHeap *heap);
+
+IDnum getDFibHeapSize(DFibHeap *heap);
+
+Time getKey(DFibHeapNode *node);
+#endif
diff --git a/fusion/inc/dfibpriv.h b/fusion/inc/dfibpriv.h
new file mode 100644
index 0000000..0106493
--- /dev/null
+++ b/fusion/inc/dfibpriv.h
@@ -0,0 +1,97 @@
+/*
+Copyright 2007, 2008 Daniel Zerbino (zerbino at ebi.ac.uk)
+
+ This file is part of Velvet.
+
+ Velvet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ Velvet is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Velvet; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+/*-
+ * Copyright 1997, 1999-2003 John-Mark Gurney.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: dfibpriv.h,v 1.8 2007/10/09 09:56:46 zerbino Exp $
+ *
+ */
+
+#ifndef _DFIBPRIV_H_
+#define _DFIBPRIV_H_
+
+//#include "globals.h"
+#include "def2.h"
+
+/*
+ * specific node operations
+ */
+
+static DFibHeapNode *dfhe_newelem(DFibHeap *);
+static void dfhe_insertafter(DFibHeapNode *a, DFibHeapNode *b);
+static inline void dfhe_insertbefore(DFibHeapNode *a, DFibHeapNode *b);
+static DFibHeapNode *dfhe_remove(DFibHeapNode *a);
+
+/*
+ * global heap operations
+ */
+struct dfibheap
+{
+ MEM_MANAGER *nodeMemory;
+ IDnum dfh_n;
+ IDnum dfh_Dl;
+ DFibHeapNode **dfh_cons;
+ DFibHeapNode *dfh_min;
+ DFibHeapNode *dfh_root;
+};
+
+static void dfh_insertrootlist(DFibHeap *, DFibHeapNode *);
+static void dfh_removerootlist(DFibHeap *, DFibHeapNode *);
+static void dfh_consolidate(DFibHeap *);
+static void dfh_heaplink(DFibHeap *h, DFibHeapNode *y, DFibHeapNode *x);
+static void dfh_cut(DFibHeap *, DFibHeapNode *, DFibHeapNode *);
+static void dfh_cascading_cut(DFibHeap *, DFibHeapNode *);
+static DFibHeapNode *dfh_extractminel(DFibHeap *);
+static void dfh_checkcons(DFibHeap *h);
+static int dfh_compare(DFibHeap *h, DFibHeapNode *a, DFibHeapNode *b);
+static int dfh_comparedata(DFibHeap *h, Time key,
+ unsigned int data, DFibHeapNode *b);
+static void dfh_insertel(DFibHeap *h, DFibHeapNode *x);
+
+
+/*
+ * general functions
+ */
+static inline IDnum ceillog2(IDnum a);
+
+#endif /* _FIBPRIV_H_ */
diff --git a/fusion/inc/extfunc.h b/fusion/inc/extfunc.h
new file mode 100644
index 0000000..aa9bef9
--- /dev/null
+++ b/fusion/inc/extfunc.h
@@ -0,0 +1,209 @@
+/***************************************************************************
+ * Title: extfunc.h
+ * Author: Haixu Tang
+ * Created: Jun. 2002
+ * Last modified: May. 2004
+ *
+ * Copyright (c) 2001-2004 The Regents of the University of California
+ * All Rights Reserved
+ * See file LICENSE for details.
+ ***************************************************************************/
+#include "check.h"
+#include "extfunc2.h"
+extern NODE **seq2nodes_with_pair(char *seqfile, char *outfile);
+extern NODE **prlSeq2nodes_with_pair(char *seqfile, char *outfile);
+extern void readseq1by1(char *src_seq, char *src_name, int *len_seq, FILE *fp, long long num_seq);
+extern void readseqPbyP(char *src_seq, char *src_name, int *insertS, int *len_seq, FILE *fp, long long num_seq);
+extern void nodes2edges_with_pair(NODE **hash_table, EDGE_PT **edge_list, char *outfile);
+extern int findOrInsertOccurenceInNodeTree(Kmer kmer, NODE **T);
+extern NODE *SplayNodeTree(NODE *T, Kmer kmer);
+extern Kmer reverseComplement(Kmer word, int overlap);
+extern Kmer hash_kmer(Kmer kmer);
+extern void link2next(NODE *node, char ch);
+extern unsigned char check_link2next(NODE *node, char ch);
+extern void unlink2next(NODE *node, char ch);
+extern void link2prev(NODE *node, char ch);
+extern unsigned char check_link2prev(NODE *node, char ch);
+extern void unlink2prev(NODE *node, char ch);
+extern int count_link2next(NODE *node);
+extern int count_link2prev(NODE *node);
+extern Kmer nextKmer(Kmer prev, char ch);
+extern Kmer prevKmer(Kmer next, char ch);
+extern long long readseqpar(int *max_len, int *min_leg, int *max_name_len, FILE *fp);
+extern void destroyNodeHash(NODE **hash_table);
+extern void free_edge_list(EDGE_PT *el);
+extern void reverseComplementSeq(char *seq, int len, char *bal_seq);
+extern void free_node_list(NODE_PT *np);
+extern NODE *SplayNodeTree_FILTER(NODE *T, Kmer kmer);
+extern NODE *allocateNode_cvg(Kmer kmer);
+extern int findOrInsertOccurenceInNodeTree_cvg(Kmer kmer, NODE **T);
+extern void free_edge_array(EDGE *ed_array, int ed_num);
+extern void free_lightctg_array(LIGHTCTG *ed_array, int ed_num);
+extern char getCharInTightString(char *tightSeq, int pos);
+extern void writeChar2tightSting(char nt, char *tightSeq, int pos);
+extern void short_reads_sum();
+extern void read_one_sequence(FILE *fp, long long *T, char **X);
+extern void output_edges(preEDGE *ed_array, int ed_num, char *outfile);
+extern void read2edge(char *seqfile, NODE **hash_table, char *outfile);
+extern void loadVertex(char *graphfile);
+extern int kmer2vt(Kmer kmer);
+extern void loadEdge(char *graphfile);
+extern boolean loadPath(char *graphfile);
+extern READINTERVAL *allocateRV(int readid, int edgeid);
+extern void createRVmemo();
+extern void dismissRV(READINTERVAL *rv);
+extern void destroyReadIntervMem();
+extern void destroyConnectMem();
+extern void u2uConcatenate();
+extern void unlink2all(NODE *node, NODE **hash_table);
+extern void cutTip(NODE **hash_table);
+extern void output_contig(EDGE *ed_array, unsigned int ed_num, char *outfile, int cut_len);
+extern void printTightString(char *tightSeq, int len);
+extern int roughUniqueness(unsigned int edgeno, char ignore_cvg, char *ignored);
+extern void outputReadPos(char *graphfile, int min_len);
+extern NODE *reverseComplementNode(NODE *node1, NODE **hash_table);
+extern void testSearch();
+extern void print_kmer(FILE *fp, Kmer kmer, char c);
+extern void allpathConcatenate();
+extern void output_updated_edges(char *outfile);
+extern void output_updated_vertex(char *outfile);
+extern void loadUpdatedEdges(char *graphfile);
+extern void loadUpdatedVertex(char *graphfile);
+extern void connectByPE(char *infile);
+extern void output_cntGVZ(char *outfile);
+extern void output_graph(char *outfile);
+extern void removeUnreliable(NODE **hash_talbe);
+extern void testLinearC2C();
+extern void output_contig_graph(char *outfile);
+extern void scaffolding(unsigned int cut_len, char *outfile);
+extern int cmp_int(const void *a, const void *b);
+extern CONNECT *allocateCN(unsigned int contigId, int gap);
+extern int recoverRep();
+extern void loadPEgrads(char *infile);
+extern int putInsertS(long long readid, int size, int *currGrads);
+extern int getInsertS(long long readid, int *readlen);
+extern int connectByPE_grad(FILE *fp, int peGrad, char *line);
+extern void PEgradsScaf(char *infile);
+extern void reorderAnnotation(char *infile, char *outfile);
+extern int count_ends(NODE **hash_table);
+extern void output_1edge(preEDGE *edge, FILE *fp);
+extern void prlRead2edge(char *libfile, char *outfile);
+extern int count_edges(NODE **hash_table);
+extern int prlFindOrInsertOccurenceInNodeTree_cvg(Kmer kmer, NODE **T, MEM_MANAGER *node_mem_manager);
+extern void prlDestroyNodeHash(NODE **hash_table);
+extern void annotFileTrans(char *infile, char *outfile);
+extern void prlLoadPath(char *graphfile);
+extern void misCheck(char *infile, char *outfile);
+extern int uniqueLenSearch(unsigned int *len_array, unsigned int *flag_array, int num, unsigned int target);
+extern int cmp_vertex(const void *a, const void *b);
+extern void linkContig2Vts();
+extern int bisearch(VERTEX *vts, int num, Kmer target);
+extern int connectByPE_gradPatch(FILE *fp1, FILE *fp2, int peGrad, char *line1, char *line2);
+extern void scaftiging(char *graphfile, int len_cut);
+extern void gapFilling(char *graphfile, int cut_len);
+extern ARC *getArcBetween(unsigned int from_ed, unsigned int to_ed);
+extern void bubblePinch(double simiCutoff, char *outfile, int M);
+extern void linearConcatenate();
+extern unsigned char setArcMulti(unsigned int from_ed, unsigned int to_ed, unsigned char value);
+extern ARC *allocateArc(unsigned int edgeid);
+extern void cutTipsInGraph(int cutLen, boolean strict);
+extern ARC *deleteArc(ARC *arc_list, ARC *arc);
+extern void compactEdgeArray();
+extern void dismissArc(ARC *arc);
+extern void createArcMemo();
+extern ARC *getArcBetween(unsigned int from_ed, unsigned int to_ed);
+extern ARC *allocateArc(unsigned int edgeid);
+extern void unlink2prevUncertain(NODE *node, char ch, boolean smaller);
+extern char firstCharInKmer(Kmer kmer);
+extern void writeChar2tightString(char nt, char *tightSeq, int pos);
+extern Kmer reverseComplementVerbose(Kmer word, int overlap);
+extern Kmer KmerPlus(Kmer prev, char ch);
+extern void output_heavyArcs(char *outfile);
+extern preARC *allocatePreArc(unsigned int edgeid);
+extern void destroyPreArcMem();
+extern void traceAlongArc(unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int *num_route);
+extern void freeContig_array();
+extern void output_scafSeq(char *graphfile, int len_cut);
+extern void putArcInHash(unsigned int from_ed, unsigned int to_ed);
+extern boolean DoesArcExist(unsigned int from_ed, unsigned int to_ed);
+extern void recordArcInHash();
+extern void destroyArcHash();
+extern void removeWeakEdges(int lenCutoff, unsigned int multiCutoff);
+extern void createArcLookupTable();
+extern void deleteArcLookupTable();
+extern void putArc2LookupTable(unsigned int from_ed, ARC *arc);
+extern void removeArcInLookupTable(unsigned int from_ed, unsigned int to_ed);
+extern ARC *arcCount(unsigned int edgeid, unsigned int *num);
+extern void mapFileTrans(char *infile);
+extern void solveReps();
+extern void removeDeadArcs();
+extern void destroyArcMem();
+extern int count_link2prevB(NODE *node);
+extern int count_link2nextB(NODE *node);
+extern void getCntsInFile(char *infile);
+extern void scafByCntInfo(char *infile);
+extern CONNECT *add1Connect(unsigned int e1, unsigned int e2, int gap, int weight, boolean inherit);
+extern void getScaff(char *infile);
+extern void traceAlongMaskedCnt(unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int *num_route);
+extern void createPreArcMemManager();
+extern boolean loadPathBin(char *graphfile);
+extern void analyzeTips(NODE **hash_table, char *graphfile);
+extern void recordArcsInLookupTable();
+extern FILE *multiFileRead1seq(char *src_seq, char *src_name, int *len_seq, FILE *fp, FILE *freads);
+extern void multiFileSeqpar(FILE *fp);
+extern long long multiFileParse(int *max_leg, int *min_leg, int *max_name_leg, FILE *fp);
+extern CONNECT *getCntBetween(unsigned int from_ed, unsigned int to_ed);
+extern void createCntMemManager();
+extern void destroyConnectMem();
+extern void createCntLookupTable();
+extern void deleteCntLookupTable();
+extern void putCnt2LookupTable(unsigned int from_c, CONNECT *cnt);
+extern int prlFindOrInsertOccurenceInEdonTree(Kmer kmer, EDON **T, MEM_MANAGER *node_mem_manager);
+extern EDON *SplayEdonTree(EDON *T, Kmer kmer);
+extern void prlDestroyEdonHash(EDON **hash_table);
+extern void prlRead2Ctg(char *seqfile, char *outfile);
+extern void prlLongRead2Ctg(char *libfile, char *outfile);
+extern boolean prlContig2nodes(char *grapfile, int len_cut);
+extern void scan_libInfo(char *libfile);
+extern int getMaxLongReadLen(int num_libs);
+extern void free_libs();
+extern boolean read1seqInLib(char *src_seq, char *src_name, int *len_seq,
+ int *libNo, boolean pair, unsigned char purpose);
+extern NODE **prlEdge2nodes(char *grapfile);
+extern void prlRead2graph(char *libfile, NODE **hash_table, char *outfile);
+extern void save4laterSolve();
+extern void solveRepsAfter();
+extern void free_pe_mem();
+extern void alloc_pe_mem(int gradsCounter);
+extern NODE *searchNodeTree(NODE *T, Kmer kmer);
+extern EDON *searchEdonTree(EDON *T, Kmer kmer);
+extern void prlDestroyPreArcMem();
+extern preARC *prlAllocatePreArc(unsigned int edgeid, MEM_MANAGER *manager);
+extern boolean prlRead2HashTable(char *libfile, char *outfile);
+extern void free_allSets();
+extern void removeSingleTips();
+extern void removeMinorTips();
+extern void kmer2edges(char *outfile);
+extern void output_vertex(char *outfile);
+extern boolean prlRead2HashTable(char *libfile, char *outfile);
+extern void Links2Scaf(char *infile);
+extern void PE2Links(char *infile);
+extern void basicContigInfo(char *infile);
+extern unsigned int getTwinCtg(unsigned int ctg);
+extern boolean isSmallerThanTwin(unsigned int ctg);
+extern boolean isLargerThanTwin(unsigned int ctg);
+extern boolean isSameAsTwin(unsigned int ctg);
+extern boolean loadMarkerBin(char *graphfile);
+extern void readsCloseGap(char *graphfile);
+extern void prlReadsCloseGap(char *graphfile);
+extern void locateReadOnScaf(char *graphfile);
+extern unsigned int getTwinEdge(unsigned int edge);
+extern boolean EdSmallerThanTwin(unsigned int edge);
+extern boolean EdLargerThanTwin(unsigned int edge);
+extern boolean EdSameAsTwin(unsigned int edge);
+extern void removeLowCovEdges(int lenCutoff, unsigned short covCutoff);
+extern int localGraph(READNEARBY *rdArray, int num, CTGinSCAF *ctg1, CTGinSCAF *ctg2,
+ int origOverlap, Kmer *kmerCtg1, Kmer *kmerCtg2,
+ int overlap, DARRAY *gapSeqArray, char *seqCtg1, char *seqCtg2, char *seqGap);
+
+
diff --git a/fusion/inc/extfunc2.h b/fusion/inc/extfunc2.h
new file mode 100644
index 0000000..a4cd5dc
--- /dev/null
+++ b/fusion/inc/extfunc2.h
@@ -0,0 +1,7 @@
+#ifndef _MEM_MANAGER
+#define _MEM_MANAGER
+extern MEM_MANAGER *createMem_manager(int num_items, size_t unit_size);
+extern void *getItem(MEM_MANAGER *mem_Manager);
+extern void returnItem(MEM_MANAGER *mem_Manager, void *);
+extern void freeMem_manager(MEM_MANAGER *mem_Manager);
+#endif
diff --git a/fusion/inc/extvab.h b/fusion/inc/extvab.h
new file mode 100644
index 0000000..7bd1d7f
--- /dev/null
+++ b/fusion/inc/extvab.h
@@ -0,0 +1,92 @@
+/***************************************************************************
+ * Title: extvab.h
+ * Author: Hongmei Zhu
+ * Created: Jun. 2007
+ * Last modified: May. 2009
+ *
+ * All Rights Reserved
+ * See file LICENSE for details.
+ ***************************************************************************/
+/*** global variables ****/
+extern int overlaplen;
+extern int inGraph;
+extern long long n_ban;
+extern Kmer WORDFILTER;
+extern boolean globalFlag;
+extern int thrd_num;
+
+extern int verbosity;
+extern char verboseStr[verboseBufSize];
+
+/**** reads info *****/
+extern long long n_solexa;
+extern long long prevNum;
+extern int ins_size_var;
+extern PE_INFO *pes;
+extern int maxReadLen;
+extern int maxReadLen4all;
+extern int minReadLen;
+extern int maxNameLen;
+extern int num_libs;
+extern LIB_INFO *lib_array;
+extern int libNo;
+extern long long readNumBack;
+extern int gradsCounter;
+/*** used for pregraph *****/
+extern MEM_MANAGER *prearc_mem_manager; //also used in scaffolding
+extern MEM_MANAGER **preArc_mem_managers;
+extern boolean deLowKmer;
+extern boolean deLowEdge;
+extern KmerSet **KmerSets; // also used in mapping
+extern KmerSet **KmerSetsPatch;
+
+extern spcKmerSet *spcSet;
+
+/**** used for contiging ****/
+extern boolean repsTie;
+extern long long arcCounter;
+extern unsigned int num_ed;
+extern unsigned int num_ed_limit;
+extern unsigned int extraEdgeNum;
+extern EDGE *edge_array;
+extern VERTEX *vt_array;
+extern MEM_MANAGER *rv_mem_manager;
+extern MEM_MANAGER *arc_mem_manager;
+extern unsigned int num_vt;
+extern int len_bar;
+extern ARC **arcLookupTable;
+extern long long *markersArray;
+/***** used for scaffolding *****/
+extern MEM_MANAGER *cn_mem_manager;
+extern unsigned int num_ctg;
+extern unsigned int *index_array;
+extern CONTIG *contig_array;
+extern int lineLen;
+extern int weakPE;
+extern long long newCntCounter;
+extern CONNECT **cntLookupTable;
+extern unsigned int ctg_short;
+extern int cvgAvg;
+extern boolean orig2new;
+/**** used for gapFilling ****/
+extern DARRAY *readSeqInGap;
+extern DARRAY *gapSeqDarray;
+extern DARRAY **darrayBuf;
+extern int fillGap;
+/**** used for searchPath *****/
+extern int maxSteps;
+extern int num_trace;
+extern unsigned int **found_routes;
+extern unsigned int *so_far;
+extern int max_n_routes;
+extern boolean maskRep;
+extern int GLDiff;
+extern int initKmerSetSize;
+extern char *shortrdsfile;
+extern char *graphfile;
+extern double OverlapPercent ;
+extern double ConflPercent ;
+extern double close_threshold;
+extern int bund_threshold;
+extern char *ctg_file;
+//extern boolean large_kmer;
diff --git a/fusion/inc/fib.h b/fusion/inc/fib.h
new file mode 100755
index 0000000..40ac9d3
--- /dev/null
+++ b/fusion/inc/fib.h
@@ -0,0 +1,81 @@
+/*
+Copyright 2007, 2008 Daniel Zerbino (zerbino at ebi.ac.uk)
+
+ This file is part of Velvet.
+
+ Velvet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ Velvet is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Velvet; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+/*-
+ * Copyright 1997, 1998-2003 John-Mark Gurney.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: fib.h,v 1.9 2007/04/24 12:16:41 zerbino Exp $
+ *
+ */
+
+#ifndef _FIB_H_
+#define _FIB_H_
+
+//#include "globals.h"
+#include <stdio.h>
+#include "def2.h"
+
+typedef Coordinate(*voidcmp) (unsigned int , unsigned int);
+
+/* functions for key heaps */
+boolean fh_isempty(FibHeap *);
+FibHeap *fh_makekeyheap(void);
+FibHeapNode *fh_insertkey(FibHeap *, Coordinate, unsigned int);
+Coordinate fh_minkey(FibHeap *);
+Coordinate fh_replacekey(FibHeap *, FibHeapNode *, Coordinate);
+unsigned int fh_replacekeydata(FibHeap *, FibHeapNode *, Coordinate, unsigned int);
+
+/* functions for unsigned int * heaps */
+FibHeap *fh_makeheap(void);
+voidcmp fh_setcmp(FibHeap *, voidcmp);
+unsigned int fh_setneginf(FibHeap *, unsigned int);
+FibHeapNode *fh_insert(FibHeap *, unsigned int);
+
+/* shared functions */
+unsigned int fh_extractmin(FibHeap *);
+unsigned int fh_min(FibHeap *);
+unsigned int fh_replacedata(FibHeapNode *, unsigned int);
+unsigned int fh_delete(FibHeap *, FibHeapNode *);
+void fh_deleteheap(FibHeap *);
+FibHeap *fh_union(FibHeap *, FibHeap *);
+
+#endif /* _FIB_H_ */
diff --git a/fusion/inc/fibHeap.h b/fusion/inc/fibHeap.h
new file mode 100644
index 0000000..adf0c7d
--- /dev/null
+++ b/fusion/inc/fibHeap.h
@@ -0,0 +1,43 @@
+/*
+Copyright 2007, 2008 Daniel Zerbino (zerbino at ebi.ac.uk)
+
+ This file is part of Velvet.
+
+ Velvet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ Velvet is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Velvet; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+#ifndef _FIBHEAP_H_
+#define _FIBHEAP_H_
+
+FibHeap *newFibHeap();
+
+FibHeapNode *insertNodeIntoHeap(FibHeap *heap, Coordinate key,
+ unsigned int node);
+
+Coordinate minKeyOfHeap(FibHeap *heap);
+
+Coordinate replaceKeyInHeap(FibHeap *heap, FibHeapNode *node,
+ Coordinate newKey);
+
+void replaceValueInHeap(FibHeapNode *node, unsigned int newValue);
+
+unsigned int removeNextNodeFromHeap(FibHeap *heap);
+
+void *destroyNodeInHeap(FibHeapNode *node, FibHeap *heap);
+
+void destroyHeap(FibHeap *heap);
+
+boolean IsHeapEmpty(FibHeap *heap);
+#endif
diff --git a/fusion/inc/fibpriv.h b/fusion/inc/fibpriv.h
new file mode 100644
index 0000000..1e88bf2
--- /dev/null
+++ b/fusion/inc/fibpriv.h
@@ -0,0 +1,112 @@
+/*
+Copyright 2007, 2008 Daniel Zerbino (zerbino at ebi.ac.uk)
+
+ This file is part of Velvet.
+
+ Velvet is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ Velvet is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Velvet; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+/*-
+ * Copyright 1997, 1999-2003 John-Mark Gurney.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: fibpriv.h,v 1.10 2007/10/09 09:56:46 zerbino Exp $
+ *
+ */
+
+#ifndef _FIBPRIV_H_
+#define _FIBPRIV_H_
+
+#include "def2.h"
+
+/*
+ * specific node operations
+ */
+struct fibheap_el
+{
+ int fhe_degree;
+ boolean fhe_mark;
+ FibHeapNode *fhe_p;
+ FibHeapNode *fhe_child;
+ FibHeapNode *fhe_left;
+ FibHeapNode *fhe_right;
+ Coordinate fhe_key;
+ unsigned int fhe_data;
+};
+
+static FibHeapNode *fhe_newelem(struct fibheap *);
+static void fhe_initelem(FibHeapNode *);
+static void fhe_insertafter(FibHeapNode *a, FibHeapNode *b);
+static inline void fhe_insertbefore(FibHeapNode *a, FibHeapNode *b);
+static FibHeapNode *fhe_remove(FibHeapNode *a);
+
+/*
+ * global heap operations
+ */
+struct fibheap
+{
+ Coordinate(*fh_cmp_fnct) (unsigned int, unsigned int);
+ MEM_MANAGER *nodeMemory;
+ IDnum fh_n;
+ IDnum fh_Dl;
+ FibHeapNode **fh_cons;
+ FibHeapNode *fh_min;
+ FibHeapNode *fh_root;
+ unsigned int fh_neginf;
+ boolean fh_keys: 1;
+};
+
+static void fh_initheap(FibHeap *);
+static void fh_insertrootlist(FibHeap *, FibHeapNode *);
+static void fh_removerootlist(FibHeap *, FibHeapNode *);
+static void fh_consolidate(FibHeap *);
+static void fh_heaplink(FibHeap *h, FibHeapNode *y, FibHeapNode *x);
+static void fh_cut(FibHeap *, FibHeapNode *, FibHeapNode *);
+static void fh_cascading_cut(FibHeap *, FibHeapNode *);
+static FibHeapNode *fh_extractminel(FibHeap *);
+static void fh_checkcons(FibHeap *h);
+static void fh_destroyheap(FibHeap *h);
+static int fh_compare(FibHeap *h, FibHeapNode *a, FibHeapNode *b);
+static int fh_comparedata(FibHeap *h, Coordinate key,
+ unsigned int data, FibHeapNode *b);
+static void fh_insertel(FibHeap *h, FibHeapNode *x);
+
+/*
+ * general functions
+ */
+static inline IDnum ceillog2(IDnum a);
+
+#endif /* _FIBPRIV_H_ */
diff --git a/fusion/inc/general.h b/fusion/inc/general.h
new file mode 100644
index 0000000..8e0e380
--- /dev/null
+++ b/fusion/inc/general.h
@@ -0,0 +1,89 @@
+/*
+ * Filename: general.h
+ *
+ *
+ * Description:
+ * Basic functions
+ *
+ * Created on: Feb 8, 2010
+ * Author: Ruibang Luo, BGI
+ *
+ * History:
+ * 1.
+ */
+
+#pragma once
+#ifndef GENERAL_H_AQUA_
+#define GENERAL_H_AQUA_
+
+#include<unistd.h>
+
+//Useful Variables*************************************************************
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#define FN_SIZE 2048
+//*****************************************************************************
+
+//Types************************************************************************
+typedef unsigned int uint;
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned long ulong;
+typedef unsigned long long ulonglong;
+
+typedef unsigned char BYTE;
+typedef unsigned short WORD;
+typedef unsigned int DWORD;
+
+typedef unsigned char u8_t;
+typedef unsigned short u16_t;
+typedef unsigned int u32_t;
+typedef unsigned long long u64_t;
+
+typedef char *chptr;
+
+//*****************************************************************************
+
+//Debugging********************************************************************
+//Verbose system
+//Verbosity should seperated into 4 levels: 0, 1, 2, 3
+#define VERBOSITY_BOTTOM 0
+#define VERBOSITY_TOP 4
+int ModifyVerbosity(const int);
+#define verboseBufSize 16384
+
+#define ModVerboseStrAndVerbose(level, ...) \
+ {\
+ if(verbosity >> level)\
+ {\
+ snprintf(verboseStr, verboseBufSize, ##__VA_ARGS__);\
+ fprintf(stderr,"[%s]:%s\n",__FUNCTION__,verboseStr);\
+ }\
+ }
+#define mvnv(level, ...) ModVerboseStrAndVerbose(level, ##__VA_ARGS__)
+#define die(...) \
+ {\
+ ModVerboseStrAndVerbose(0, ##__VA_ARGS__);\
+ fprintf(stderr,"Program terminated.\n");\
+ exit(EXIT_FAILURE);\
+ }
+#define sigdie(sig, ...) \
+ {\
+ ModVerboseStrAndVerbose(0, ##__VA_ARGS__);\
+ fprintf(stderr,"Program terminated.\n");\
+ exit(sig);\
+ }
+#define perrdie(...) \
+ {\
+ ModVerboseStrAndVerbose(0, ##__VA_ARGS__);\
+ perror("");\
+ fprintf(stderr,"Program terminated.\n");\
+ exit(EXIT_FAILURE);\
+ }
+#define mk \
+ {\
+ fprintf(stderr, "DBG Marker @ %s:%d\n", __FUNCTION__, __LINE__);\
+ }
+
+#endif
diff --git a/fusion/inc/global.h b/fusion/inc/global.h
new file mode 100644
index 0000000..ff7f9c0
--- /dev/null
+++ b/fusion/inc/global.h
@@ -0,0 +1,74 @@
+int overlaplen = 25;
+int verbosity = 3;
+char verboseStr[verboseBufSize];
+int inGraph;
+long long n_ban;
+long long n_solexa = 0;
+long long prevNum = 0;
+int ins_size_var = 20;
+PE_INFO *pes = NULL;
+MEM_MANAGER *rv_mem_manager = NULL;
+MEM_MANAGER *cn_mem_manager = NULL;
+MEM_MANAGER *arc_mem_manager = NULL;
+unsigned int num_vt = 0;
+unsigned int **found_routes = NULL;
+unsigned int *so_far = NULL;
+int max_n_routes = 10;
+int num_trace;
+Kmer WORDFILTER;
+unsigned int num_ed = 0;
+unsigned int num_ctg = 0;
+unsigned int num_ed_limit;
+unsigned int extraEdgeNum;
+EDGE *edge_array = NULL;
+VERTEX *vt_array = NULL;
+unsigned int *index_array = NULL;
+CONTIG *contig_array = NULL;
+int lineLen;
+int len_bar = 100;
+int weakPE = 3;
+int fillGap = 0;
+boolean globalFlag;
+long long arcCounter;
+MEM_MANAGER *prearc_mem_manager = NULL;
+MEM_MANAGER **preArc_mem_managers = NULL;
+int maxReadLen = 0;
+int maxReadLen4all = 0;
+int minReadLen = 0;
+int maxNameLen = 0;
+ARC **arcLookupTable = NULL;
+long long *markersArray = NULL;
+boolean deLowKmer = 0;
+boolean deLowEdge = 1;
+long long newCntCounter;
+boolean repsTie = 0;
+CONNECT **cntLookupTable = NULL;
+int num_libs = 0;
+LIB_INFO *lib_array = NULL;
+int libNo = 0;
+long long readNumBack;
+int gradsCounter;
+unsigned int ctg_short = 0;
+int thrd_num = 8;
+int cvgAvg = 0;
+KmerSet **KmerSets = NULL;
+KmerSet **KmerSetsPatch = NULL;
+
+spcKmerSet *spcSet = NULL;
+
+DARRAY *readSeqInGap = NULL;
+DARRAY *gapSeqDarray = NULL;
+DARRAY **darrayBuf;
+boolean orig2new;
+int maxSteps;
+boolean maskRep = 1;
+int GLDiff = 50;
+int initKmerSetSize = 0;
+char *shortrdsfile;
+char *graphfile;
+double OverlapPercent = 0.05;
+double ConflPercent = 0.05;
+double close_threshold = 0.1;
+int bund_threshold = 5;
+char *ctg_file = NULL;
+//boolean large_kmer=0;
diff --git a/fusion/inc/newhash.h b/fusion/inc/newhash.h
new file mode 100644
index 0000000..4dcbd47
--- /dev/null
+++ b/fusion/inc/newhash.h
@@ -0,0 +1,122 @@
+#ifndef __NEW_HASH_RJ
+#define __NEW_HASH_RJ
+
+#ifndef K_LOAD_FACTOR
+#define K_LOAD_FACTOR 0.75
+#endif
+
+#define MAX_KMER_COV 63
+#define EDGE_BIT_SIZE 6
+#define EDGE_XOR_MASK 0x3FU
+#define LINKS_BITS 0x00FFFFFFU
+
+#define get_kmer_seq(mer) ((mer).seq)
+#define set_kmer_seq(mer, val) ((mer).seq = val)
+
+#define get_kmer_left_cov(mer, idx) (((mer).l_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
+#define set_kmer_left_cov(mer, idx, val) ((mer).l_links = ((mer).l_links&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) )
+#define get_kmer_left_covs(mer) (get_kmer_left_cov(mer, 0) + get_kmer_left_cov(mer, 1) + get_kmer_left_cov(mer, 2) + get_kmer_left_cov(mer, 3))
+
+#define get_kmer_right_cov(mer, idx) (((mer).r_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
+#define set_kmer_right_cov(mer, idx, val) ((mer).r_links = ((mer).r_links&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) )
+#define get_kmer_right_covs(mer) (get_kmer_right_cov(mer, 0) + get_kmer_right_cov(mer, 1) + get_kmer_right_cov(mer, 2) + get_kmer_right_cov(mer, 3))
+
+
+#define is_kmer_entity_null(flags, idx) ((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x01)
+#define is_kmer_entity_del(flags, idx) ((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x02)
+#define set_kmer_entity_null(flags, idx) ((flags)[(idx)>>4] |= (0x01u<<(((idx)&0x0f)<<1)))
+#define set_kmer_entity_del(flags, idx) ((flags)[(idx)>>4] |= (0x02u<<(((idx)&0x0f)<<1)))
+#define clear_kmer_entity_null(flags, idx) ((flags)[(idx)>>4] &= ~(0x01u<<(((idx)&0x0f)<<1)))
+#define clear_kmer_entity_del(flags, idx) ((flags)[(idx)>>4] &= ~(0x02u<<(((idx)&0x0f)<<1)))
+#define exists_kmer_entity(flags, idx) (!((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x03))
+
+
+typedef struct kmer_st
+{
+ Kmer seq;
+ ubyte4 l_links; // sever as edgeID since make_edge
+ ubyte4 r_links: 4 * EDGE_BIT_SIZE;
+ ubyte4 linear: 1;
+ ubyte4 deleted: 1;
+ ubyte4 checked: 1;
+ ubyte4 single: 1;
+ ubyte4 twin: 2;
+ ubyte4 inEdge: 2;
+} kmer_t;
+
+typedef struct kmerSet_st
+{
+ kmer_t *array;
+ ubyte4 *flags;
+ ubyte8 size;
+ ubyte8 count;
+ ubyte8 max;
+ double load_factor;
+ ubyte8 iter_ptr;
+
+ ubyte8 searchCnt;
+ ubyte8 foundCnt;
+ ubyte8 delCnt;
+ ubyte8 searchSpcSeedCnt;
+ ubyte8 getSpcSeedCnt;
+ ubyte8 levelGet[3];
+
+} KmerSet;
+
+typedef struct kmer_pt
+{
+ kmer_t *node;
+ Kmer kmer;
+ boolean isSmaller;
+ struct kmer_pt *next;
+} KMER_PT;
+
+//////////////////////////////////////////////////////////////// spaced seed
+
+typedef struct spaced_base
+{
+ ubyte2 spaced_bases: 14;
+ //ubyte2 repeat:1;
+ //ubyte4 edgeID;
+ kmer_t *large_kmer;
+ struct spaced_base *next;
+} spcBase;
+
+typedef struct spaced_kmer
+{
+ Kmer seq;
+ struct spaced_base *start;
+ ubyte4 spaced_base_num;
+} spcKmer;
+
+typedef struct spaced_kmer_set
+{
+ spcKmer *array;
+ ubyte4 *flags;
+ ubyte8 size;
+ ubyte8 count;
+ ubyte8 max;
+ double load_factor;
+} spcKmerSet;
+
+extern spcKmerSet *init_spckmerset(ubyte8 init_size, float load_factor);
+extern void buildSpcKmerSet(KmerSet *set, spcKmerSet *spaced_kset);
+extern int search_spckmerset(spcKmerSet *set, ubyte8 seq, spcKmer **rs);
+extern int put_spckmerset(spcKmerSet *set, Kmer spc_kmer, ubyte2 spaced_bases, kmer_t *node);
+
+//////////////////////////////////////////////////////////////// spaced seed END
+
+extern KmerSet *init_kmerset(ubyte8 init_size, float load_factor);
+extern int search_kmerset(KmerSet *set, ubyte8 seq, kmer_t **rs);
+extern int put_kmerset(KmerSet *set, ubyte8 seq, ubyte left, ubyte right, kmer_t **kmer_p);
+extern byte8 count_kmerset(KmerSet *set);
+extern void free_Sets(KmerSet **KmerSets, int num);
+extern void free_kmerset(KmerSet *set);
+extern void dislink2nextUncertain(kmer_t *node, char ch, boolean smaller);
+extern void dislink2prevUncertain(kmer_t *node, char ch, boolean smaller);
+
+extern int count_branch2prev(kmer_t *node);
+extern int count_branch2next(kmer_t *node);
+extern char firstCharInKmer(Kmer kmer);
+
+#endif
diff --git a/fusion/inc/nuc.h b/fusion/inc/nuc.h
new file mode 100644
index 0000000..a4dbe25
--- /dev/null
+++ b/fusion/inc/nuc.h
@@ -0,0 +1,14 @@
+/***************************************************************************
+ * Title: nuc.h
+ * Author: Haixu Tang
+ * Created: Jun. 2002
+ * Last modified: May. 2004
+ *
+ * Copyright (c) 2001-2004 The Regents of the University of California
+ * All Rights Reserved
+ * See file LICENSE for details.
+ ***************************************************************************/
+int total_nuc = 16;
+char na_name[17] = {'g', 'a', 't', 'c',
+ 'n', 'r', 'y', 'w', 's', 'm', 'k', 'h', 'b', 'v', 'd', 'x'
+ };
diff --git a/fusion/inc/stack.h b/fusion/inc/stack.h
new file mode 100644
index 0000000..0fb0b07
--- /dev/null
+++ b/fusion/inc/stack.h
@@ -0,0 +1,35 @@
+#ifndef __STACK__
+#define __STACK__
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+typedef struct block_starter
+{
+ struct block_starter *prev;
+ struct block_starter *next;
+} BLOCK_STARTER;
+
+typedef struct stack
+{
+ BLOCK_STARTER *block_list;
+ int index_in_block;
+ int items_per_block;
+ int item_c;
+ size_t item_size;
+ BLOCK_STARTER *block_backup;
+ int index_backup;
+ int item_c_backup;
+} STACK;
+
+void stackBackup(STACK *astack);
+void stackRecover(STACK *astack);
+void *stackPush(STACK *astack);
+void *stackPop(STACK *astack);
+void freeStack(STACK *astack);
+void emptyStack(STACK *astack);
+STACK *createStack(int num_items, size_t unit_size);
+
+
+#endif
diff --git a/fusion/inc/stdinc.h b/fusion/inc/stdinc.h
new file mode 100755
index 0000000..9700d5d
--- /dev/null
+++ b/fusion/inc/stdinc.h
@@ -0,0 +1,40 @@
+/***************************************************************************
+
+ * Title: stdinc.h
+
+ * Author: Haixu Tang
+
+ * Created: Jun. 2002
+
+ * Last modified: May. 2004
+
+ *
+
+ * Copyright (c) 2001-2004 The Regents of the University of California
+
+ * All Rights Reserved
+
+ * See file LICENSE for details.
+
+ ***************************************************************************/
+
+#include <limits.h>
+
+#include <stdio.h>
+
+#include <math.h>
+
+#include <unistd.h>
+
+#include <string.h>
+
+#include <stdlib.h>
+
+#include <time.h>
+
+#include <pthread.h>
+
+#include "def.h"
+
+#include "general.h"
+
diff --git a/fusion/inc/types.h b/fusion/inc/types.h
new file mode 100755
index 0000000..fdeb4f6
--- /dev/null
+++ b/fusion/inc/types.h
@@ -0,0 +1,14 @@
+#ifndef __TYPES_RJ
+#define __TYPES_RJ
+
+typedef unsigned long long ubyte8;
+typedef unsigned int ubyte4;
+typedef unsigned short ubyte2;
+typedef unsigned char ubyte;
+
+typedef long long byte8;
+typedef int byte4;
+typedef short byte2;
+typedef char byte;
+
+#endif
diff --git a/fusion/kmer.c b/fusion/kmer.c
new file mode 100644
index 0000000..cdd3ff8
--- /dev/null
+++ b/fusion/kmer.c
@@ -0,0 +1,145 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+static unsigned char filter_array[8] = { (unsigned char) 1, ((unsigned char) 1) << 1, ((unsigned char) 1) << 2, ((unsigned char) 1) << 3, ((unsigned char) 1) << 4, ((unsigned char) 1) << 5, ((unsigned char) 1) << 6, ((unsigned char) 1) << 7};
+
+
+void link2next(NODE *node, char ch)
+{
+ if(node->links & filter_array[(int)ch])
+ node->linksB = node->linksB | filter_array[(int)ch];
+ else
+ node->links = node->links | filter_array[(int)ch];
+
+}
+
+unsigned char check_linkB2next(NODE *node, char ch)
+{
+ return filter_array[(int)ch] & node->linksB;
+}
+
+unsigned char check_link2next(NODE *node, char ch)
+{
+ return filter_array[(int)ch] & node->links;
+}
+
+void unlink2next(NODE *node, char ch)
+{
+ node->links = node->links & (~filter_array[(int)ch]);
+}
+
+
+void link2prev(NODE *node, char ch)
+{
+ if(node->links & filter_array[ch + 4])
+ node->linksB = node->linksB | filter_array[ch + 4];
+ else
+ node->links = node->links | filter_array[ch + 4];
+}
+
+unsigned char check_linkB2prev(NODE *node, char ch)
+{
+ return filter_array[ch + 4] & node->linksB;
+}
+
+unsigned char check_link2prev(NODE *node, char ch)
+{
+ return filter_array[ch + 4] & node->links;
+}
+
+void unlink2prev(NODE *node, char ch)
+{
+ node->links = node->links & (~filter_array[ch + 4]);
+}
+
+int count_link2next(NODE *node)
+{
+ int num = 0, i;
+ unsigned char ch = node->links;
+
+ for(i = 0; i < 4; i++)
+ {
+ num += ch & 0x01;
+ ch >>= 1;
+ }
+
+ return num;
+}
+
+int count_link2nextB(NODE *node)
+{
+ int num = 0, i;
+ unsigned char ch = node->linksB;
+
+ for(i = 0; i < 4; i++)
+ {
+ num += ch & 0x01;
+ ch >>= 1;
+ }
+
+ return num;
+}
+
+int count_link2prevB(NODE *node)
+{
+ int num = 0, i;
+ unsigned char ch = node->linksB;
+
+ ch >>= 4;
+
+ for(i = 0; i < 4; i++)
+ {
+ num += ch & 0x01;
+ ch >>= 1;
+ }
+
+ return num;
+}
+
+int count_link2prev(NODE *node)
+{
+ int num = 0, i;
+ unsigned char ch = node->links;
+
+ ch >>= 4;
+
+ for(i = 0; i < 4; i++)
+ {
+ num += ch & 0x01;
+ ch >>= 1;
+ }
+
+ return num;
+}
+
+Kmer KmerPlus(Kmer prev, char ch)
+{
+ Kmer word = prev;
+ word <<= 2;
+ word += ch;
+ return word;
+}
+Kmer nextKmer(Kmer prev, char ch)
+{
+ Kmer word = prev;
+ word <<= 2;
+ word &= WORDFILTER;
+ word += ch;
+ return word;
+}
+
+Kmer prevKmer(Kmer next, char ch)
+{
+ Kmer word = next;
+ word >>= 2;
+ word += ((Kmer)ch) << 2 * (overlaplen - 1);
+ return word;
+}
+
+char firstCharInKmer(Kmer kmer)
+{
+ return (char) (kmer >> 2 * (overlaplen - 1)); // & 3;
+}
+
diff --git a/fusion/lib.c b/fusion/lib.c
new file mode 100644
index 0000000..d9e27fa
--- /dev/null
+++ b/fusion/lib.c
@@ -0,0 +1,438 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+static char tabs[2][1024];
+
+static boolean splitColumn(char *line)
+{
+ int len = strlen(line);
+ int i = 0, j;
+ int tabs_n = 0;
+
+ while(i < len)
+ {
+ if(line[i] >= 32 && line[i] <= 126 && line[i] != '=')
+ {
+ j = 0;
+
+ while(i < len && line[i] >= 32 && line[i] <= 126 && line[i] != '=')
+ {
+ tabs[tabs_n][j++] = line[i];
+ i++;
+ }
+
+ tabs[tabs_n][j] = '\0';
+ tabs_n++;
+
+ if(tabs_n == 2)
+ return 1;
+ }
+
+ i++;
+ }
+
+ if(tabs_n == 2)
+ return 1;
+ else
+ return 0;
+}
+
+static int cmp_lib(const void *a, const void *b)
+{
+ LIB_INFO *A, *B;
+ A = (LIB_INFO *)a;
+ B = (LIB_INFO *)b;
+
+ if(A->avg_ins > B->avg_ins)
+ return 1;
+ else if(A->avg_ins == B->avg_ins)
+ return 0;
+ else
+ return -1;
+}
+
+void scan_libInfo(char *libfile)
+{
+ FILE *fp;
+ char line[1024], ch;
+ int i, j, index;
+ int libCounter;
+ boolean flag;
+
+ fp = ckopen(libfile, "r");
+ num_libs = 0;
+
+ while(fgets(line, 1024, fp))
+ {
+ ch = line[5];
+ line[5] = '\0';
+
+ if(strcmp(line, "[LIB]") == 0)
+ num_libs++;
+
+ if(!num_libs)
+ {
+ line[5] = ch;
+ flag = splitColumn(line);
+
+ if(!flag)
+ continue;
+
+ if(strcmp(tabs[0], "max_rd_len") == 0)
+ maxReadLen = atoi(tabs[1]);
+ }
+ }
+
+ //count file numbers of each type
+ lib_array = (LIB_INFO *)ckalloc(num_libs * sizeof(LIB_INFO));
+
+ for(i = 0; i < num_libs; i++)
+ {
+ lib_array[i].asm_flag = 3;
+ lib_array[i].rd_len_cutoff = 0;
+ lib_array[i].rank = 0;
+ lib_array[i].pair_num_cut = 1;
+ lib_array[i].map_len = 0;
+ lib_array[i].num_s_a_file = 0;
+ lib_array[i].num_s_q_file = 0;
+ lib_array[i].num_p_file = 0;
+
+ lib_array[i].num_a1_file = 0;
+ lib_array[i].num_a2_file = 0;
+ lib_array[i].num_q1_file = 0;
+ lib_array[i].num_q2_file = 0;
+ }
+
+ libCounter = -1;
+ rewind(fp);
+
+ i = -1;
+
+ while(fgets(line, 1024, fp))
+ {
+ ch = line[5];
+ line[5] = '\0';
+
+ if(strcmp(line, "[LIB]") == 0)
+ {
+ i++;
+ continue;
+ }
+
+ line[5] = ch;
+ flag = splitColumn(line);
+
+ if(!flag)
+ continue;
+
+ if(strcmp(tabs[0], "f1") == 0)
+ {
+ lib_array[i].num_a1_file++;
+ }
+ else if(strcmp(tabs[0], "q1") == 0)
+ {
+ lib_array[i].num_q1_file++;
+ }
+ else if(strcmp(tabs[0], "f2") == 0)
+ {
+ lib_array[i].num_a2_file++;
+ }
+ else if(strcmp(tabs[0], "q2") == 0)
+ {
+ lib_array[i].num_q2_file++;
+ }
+ else if(strcmp(tabs[0], "f") == 0)
+ {
+ lib_array[i].num_s_a_file++;
+ }
+ else if(strcmp(tabs[0], "q") == 0)
+ {
+ lib_array[i].num_s_q_file++;
+ }
+ else if(strcmp(tabs[0], "p") == 0)
+ {
+ lib_array[i].num_p_file++;
+ }
+ }
+
+ //allocate memory for filenames
+ for(i = 0; i < num_libs; i++)
+ {
+
+ if(lib_array[i].num_s_a_file)
+ {
+ lib_array[i].s_a_fname = (char **)ckalloc(lib_array[i].num_s_a_file * sizeof(char *));
+
+ for(j = 0; j < lib_array[i].num_s_a_file; j++)
+ lib_array[i].s_a_fname[j] = (char *)ckalloc(1024 * sizeof(char));
+ }
+
+ if(lib_array[i].num_s_q_file)
+ {
+ lib_array[i].s_q_fname = (char **)ckalloc(lib_array[i].num_s_q_file * sizeof(char *));
+
+ for(j = 0; j < lib_array[i].num_s_q_file; j++)
+ lib_array[i].s_q_fname[j] = (char *)ckalloc(1024 * sizeof(char));
+ }
+
+ if(lib_array[i].num_p_file)
+ {
+ lib_array[i].p_fname = (char **)ckalloc(lib_array[i].num_p_file * sizeof(char *));
+
+ for(j = 0; j < lib_array[i].num_p_file; j++)
+ lib_array[i].p_fname[j] = (char *)ckalloc(1024 * sizeof(char));
+ }
+
+ if(lib_array[i].num_a1_file)
+ {
+ lib_array[i].a1_fname = (char **)ckalloc(lib_array[i].num_a1_file * sizeof(char *));
+
+ for(j = 0; j < lib_array[i].num_a1_file; j++)
+ lib_array[i].a1_fname[j] = (char *)ckalloc(1024 * sizeof(char));
+ }
+
+ if(lib_array[i].num_a2_file)
+ {
+ lib_array[i].a2_fname = (char **)ckalloc(lib_array[i].num_a2_file * sizeof(char *));
+
+ for(j = 0; j < lib_array[i].num_a2_file; j++)
+ lib_array[i].a2_fname[j] = (char *)ckalloc(1024 * sizeof(char));
+ }
+
+ if(lib_array[i].num_q1_file)
+ {
+ lib_array[i].q1_fname = (char **)ckalloc(lib_array[i].num_q1_file * sizeof(char *));
+
+ for(j = 0; j < lib_array[i].num_q1_file; j++)
+ lib_array[i].q1_fname[j] = (char *)ckalloc(1024 * sizeof(char));
+ }
+
+ if(lib_array[i].num_q2_file)
+ {
+ lib_array[i].q2_fname = (char **)ckalloc(lib_array[i].num_q2_file * sizeof(char *));
+
+ for(j = 0; j < lib_array[i].num_q2_file; j++)
+ lib_array[i].q2_fname[j] = (char *)ckalloc(1024 * sizeof(char));
+ }
+ }
+
+ // get file names
+ for(i = 0; i < num_libs; i++)
+ {
+ lib_array[i].curr_type = 1;
+ lib_array[i].curr_index = 0;
+ lib_array[i].fp1 = NULL;
+ lib_array[i].fp2 = NULL;
+
+ lib_array[i].num_s_a_file = 0;
+ lib_array[i].num_s_q_file = 0;
+ lib_array[i].num_p_file = 0;
+
+ lib_array[i].num_a1_file = 0;
+ lib_array[i].num_a2_file = 0;
+ lib_array[i].num_q1_file = 0;
+ lib_array[i].num_q2_file = 0;
+ }
+
+ libCounter = -1;
+ rewind(fp);
+
+ i = -1;
+
+ while(fgets(line, 1024, fp))
+ {
+ ch = line[5];
+ line[5] = '\0';
+
+ if(strcmp(line, "[LIB]") == 0)
+ {
+ i++;
+ continue;
+ }
+
+ line[5] = ch;
+ flag = splitColumn(line);
+
+ if(!flag)
+ continue;
+
+ if(strcmp(tabs[0], "f1") == 0)
+ {
+ index = lib_array[i].num_a1_file++;
+ strcpy(lib_array[i].a1_fname[index], tabs[1]);
+ }
+ else if(strcmp(tabs[0], "q1") == 0)
+ {
+ index = lib_array[i].num_q1_file++;
+ strcpy(lib_array[i].q1_fname[index], tabs[1]);
+ }
+ else if(strcmp(tabs[0], "f2") == 0)
+ {
+ index = lib_array[i].num_a2_file++;
+ strcpy(lib_array[i].a2_fname[index], tabs[1]);
+ }
+ else if(strcmp(tabs[0], "q2") == 0)
+ {
+ index = lib_array[i].num_q2_file++;
+ strcpy(lib_array[i].q2_fname[index], tabs[1]);
+ }
+ else if(strcmp(tabs[0], "f") == 0)
+ {
+ index = lib_array[i].num_s_a_file++;
+ strcpy(lib_array[i].s_a_fname[index], tabs[1]);
+ }
+ else if(strcmp(tabs[0], "q") == 0)
+ {
+ index = lib_array[i].num_s_q_file++;
+ strcpy(lib_array[i].s_q_fname[index], tabs[1]);
+ }
+ else if(strcmp(tabs[0], "p") == 0)
+ {
+ index = lib_array[i].num_p_file++;
+ strcpy(lib_array[i].p_fname[index], tabs[1]);
+ }
+ else if(strcmp(tabs[0], "min_ins") == 0)
+ lib_array[i].min_ins = atoi(tabs[1]);
+ else if(strcmp(tabs[0], "max_ins") == 0)
+ lib_array[i].max_ins = atoi(tabs[1]);
+ else if(strcmp(tabs[0], "avg_ins") == 0)
+ lib_array[i].avg_ins = atoi(tabs[1]);
+ else if(strcmp(tabs[0], "rd_len_cutoff") == 0)
+ lib_array[i].rd_len_cutoff = atoi(tabs[1]);
+ else if(strcmp(tabs[0], "reverse_seq") == 0)
+ lib_array[i].reverse = atoi(tabs[1]);
+ else if(strcmp(tabs[0], "asm_flags") == 0)
+ lib_array[i].asm_flag = atoi(tabs[1]);
+ else if(strcmp(tabs[0], "rank") == 0)
+ lib_array[i].rank = atoi(tabs[1]);
+ else if(strcmp(tabs[0], "pair_num_cutoff") == 0)
+ lib_array[i].pair_num_cut = atoi(tabs[1]);
+ else if(strcmp(tabs[0], "rd_len_cutoff") == 0)
+ lib_array[i].rd_len_cutoff = atoi(tabs[1]);
+ else if(strcmp(tabs[0], "map_len") == 0)
+ lib_array[i].map_len = atoi(tabs[1]);
+
+ }
+
+ fclose(fp);
+
+ qsort(&lib_array[0], num_libs, sizeof(LIB_INFO), cmp_lib);
+}
+
+int getMaxLongReadLen(int num_libs)
+{
+ int i;
+ int maxLong = 0;
+ boolean Has = 0;
+
+ for(i = 0; i < num_libs; i++)
+ {
+ if(lib_array[i].asm_flag != 4)
+ continue;
+
+ Has = 1;
+ maxLong = maxLong < lib_array[i].rd_len_cutoff ? lib_array[i].rd_len_cutoff : maxLong;
+ }
+
+ if(!Has)
+ return maxLong;
+ else
+ return maxLong > 0 ? maxLong : maxReadLen;
+}
+
+void free_libs()
+{
+
+ if(!lib_array)
+ return;
+
+ int i, j;
+
+ for(i = 0; i < num_libs; i++)
+ {
+ //printf("[LIB] %d, avg_ins %d, reverse %d \n",i,lib_array[i].avg_ins,lib_array[i].reverse);
+ if(lib_array[i].num_s_a_file)
+ {
+ //printf("%d single fasta files\n",lib_array[i].num_s_a_file);
+ for(j = 0; j < lib_array[i].num_s_a_file; j++)
+ free((void *)lib_array[i].s_a_fname[j]);
+
+ free((void *)lib_array[i].s_a_fname);
+ }
+
+ if(lib_array[i].num_s_q_file)
+ {
+ //printf("%d single fastq files\n",lib_array[i].num_s_q_file);
+ for(j = 0; j < lib_array[i].num_s_q_file; j++)
+ free((void *)lib_array[i].s_q_fname[j]);
+
+ free((void *)lib_array[i].s_q_fname);
+ }
+
+ if(lib_array[i].num_p_file)
+ {
+ //printf("%d paired fasta files\n",lib_array[i].num_p_file);
+ for(j = 0; j < lib_array[i].num_p_file; j++)
+ free((void *)lib_array[i].p_fname[j]);
+
+ free((void *)lib_array[i].p_fname);
+ }
+
+ if(lib_array[i].num_a1_file)
+ {
+ //printf("%d read1 fasta files\n",lib_array[i].num_a1_file);
+ for(j = 0; j < lib_array[i].num_a1_file; j++)
+ free((void *)lib_array[i].a1_fname[j]);
+
+ free((void *)lib_array[i].a1_fname);
+ }
+
+ if(lib_array[i].num_a2_file)
+ {
+ //printf("%d read2 fasta files\n",lib_array[i].num_a2_file);
+ for(j = 0; j < lib_array[i].num_a2_file; j++)
+ free((void *)lib_array[i].a2_fname[j]);
+
+ free((void *)lib_array[i].a2_fname);
+ }
+
+ if(lib_array[i].num_q1_file)
+ {
+ //printf("%d read1 fastq files\n",lib_array[i].num_q1_file);
+ for(j = 0; j < lib_array[i].num_q1_file; j++)
+ free((void *)lib_array[i].q1_fname[j]);
+
+ free((void *)lib_array[i].q1_fname);
+ }
+
+ if(lib_array[i].num_q2_file)
+ {
+ //printf("%d read2 fastq files\n",lib_array[i].num_q2_file);
+ for(j = 0; j < lib_array[i].num_q2_file; j++)
+ free((void *)lib_array[i].q2_fname[j]);
+
+ free((void *)lib_array[i].q2_fname);
+ }
+ }
+
+ num_libs = 0;
+ free((void *)lib_array);
+}
+
+void alloc_pe_mem(int gradsCounter)
+{
+ if(gradsCounter)
+ pes = (PE_INFO *)ckalloc(gradsCounter * sizeof(PE_INFO));
+}
+
+void free_pe_mem()
+{
+ if(pes)
+ {
+ free((void *)pes);
+ pes = NULL;
+ }
+}
+
diff --git a/fusion/loadGraph.c b/fusion/loadGraph.c
new file mode 100644
index 0000000..931c5ee
--- /dev/null
+++ b/fusion/loadGraph.c
@@ -0,0 +1,494 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+#define preARCBLOCKSIZE 100000
+
+static void loadArcs(char *graphfile);
+static void loadContig(char *graphfile);
+
+void loadUpdatedVertex(char *graphfile)
+{
+ char name[256], line[256];
+ FILE *fp;
+ Kmer word, bal_word;
+ int num_kmer, i;
+ char ch;
+
+ sprintf(name, "%s.updated.vertex", graphfile);
+ fp = ckopen(name, "r");
+
+ while(fgets(line, sizeof(line), fp) != NULL)
+ {
+ if(line[0] == 'V')
+ {
+ sscanf(line + 6, "%d %c %d", &num_kmer, &ch, &overlaplen);
+ printf("there're %d kmers in vertex file\n", num_kmer);
+ //printf("total %d kmer in all contigs.\n",num_kmer);
+ break;
+ }
+ }
+
+ vt_array = (VERTEX *)ckalloc((2 * num_kmer) * sizeof(VERTEX));
+
+ for(i = 0; i < num_kmer; i++)
+ {
+ fscanf(fp, "%llx ", &word);
+ vt_array[i].kmer = word;
+ }
+
+ fclose(fp);
+
+ for(i = 0; i < num_kmer; i++)
+ {
+ bal_word = reverseComplement(vt_array[i].kmer, overlaplen);
+ vt_array[i + num_kmer].kmer = bal_word;
+ }
+
+ num_vt = num_kmer;
+}
+int cmp_int(const void *a, const void *b)
+{
+ int *A, *B;
+ A = (int *)a;
+ B = (int *)b;
+
+ if(*A > *B)
+ return 1;
+ else if(*A == *B)
+ return 0;
+ else
+ return -1;
+}
+int uniqueLenSearch(unsigned int *len_array, unsigned int *flag_array, int num, unsigned int target)
+{
+ int mid, low, high;
+ low = 1;
+ high = num;
+
+ while(low <= high)
+ {
+ mid = (low + high) / 2;
+
+ if(len_array[mid] == target)
+ break;
+ else if(target > len_array[mid])
+ low = mid + 1;
+ else
+ high = mid - 1;
+ }
+
+ if(low > high)
+ return -1;
+
+ //locate the first same length unflaged
+ return flag_array[mid]++;
+
+}
+
+int lengthSearch(unsigned int *len_array, unsigned int *flag_array, int num, unsigned int target)
+{
+ int mid, low, high, i;
+ low = 1;
+ high = num;
+
+ while(low <= high)
+ {
+ mid = (low + high) / 2;
+
+ if(len_array[mid] == target)
+ break;
+ else if(target > len_array[mid])
+ low = mid + 1;
+ else
+ high = mid - 1;
+ }
+
+ if(low > high)
+ return -1;
+
+ //locate the first same length unflaged
+ if(!flag_array[mid])
+ {
+ for(i = mid - 1; i > 0; i--)
+ {
+ if(len_array[i] != len_array[mid] || flag_array[i])
+ break;
+ }
+
+ flag_array[i + 1] = 1;
+ return i + 1;
+ }
+ else
+ {
+ for(i = mid + 1; i <= num; i++)
+ {
+ if(!flag_array[i])
+ break;
+ }
+
+ flag_array[i] = 1;
+ return i;
+ }
+
+}
+
+void quick_sort_int(unsigned int *length_array, int low, int high)
+{
+ int i, j;
+ Kmer pivot;
+
+ if (low < high)
+ {
+ pivot = length_array[low];
+ i = low;
+ j = high;
+
+ while(i < j)
+ {
+ while (i < j && length_array[j] >= pivot)
+ j--;
+
+ if(i < j)
+ length_array[i++] = length_array[j];
+
+ while (i < j && length_array[i] <= pivot)
+ i++;
+
+ if(i < j)
+ length_array[j--] = length_array[i];
+ }
+
+ length_array[i] = pivot;
+
+ quick_sort_int(length_array, low, i - 1);
+ quick_sort_int(length_array, i + 1, high);
+ }
+}
+
+void loadUpdatedEdges(char *graphfile)
+{
+ char c, name[256], line[1024];
+ int bal_ed, cvg;
+ FILE *fp, *out_fp;
+ unsigned long long from_kmer, to_kmer;
+ unsigned int num_ctgge, length, index = 0, num_kmer;
+ unsigned int i = 0, j;
+ unsigned int newIndex;
+ unsigned int *length_array, *flag_array, diff_len;
+ char *outfile = graphfile;
+ long long cvgSum = 0;
+ long long counter = 0;
+
+ //get overlaplen from *.preGraphBasic
+ /*sprintf(name, "%s.preGraphBasic", graphfile);
+ fp = ckopen(name, "r");
+
+ while(fgets(line,sizeof(line),fp)!=NULL){
+ if(line[0] == 'V'){
+ sscanf(line+6, "%d %c %d",&num_kmer,&c,&overlaplen);
+ //printf("K = %d\n",overlaplen);
+ break;
+ }
+ }*/
+ if(ctg_short == 0)
+ ctg_short = overlaplen + 2;
+
+ //fclose(fp);
+
+ sprintf(name, "%s.updated.edge", graphfile);
+ fp = ckopen(name, "r");
+ sprintf(name, "%s.newContigIndex", outfile);
+ out_fp = ckopen(name, "w");
+
+ while(fgets(line, sizeof(line), fp) != NULL)
+ {
+ if(line[0] == 'E')
+ {
+ sscanf(line + 5, "%d", &num_ctgge);
+ //printf("there're %d edge in edge file\n",num_ctgge);
+ //printf("total %d contigs\n",num_ctgge);
+ break;
+ }
+ }
+
+ index_array = (unsigned int *)ckalloc((num_ctgge + 1) * sizeof(unsigned int));
+ length_array = (unsigned int *)ckalloc((num_ctgge + 1) * sizeof(unsigned int));
+ flag_array = (unsigned int *)ckalloc((num_ctgge + 1) * sizeof(unsigned int));
+
+ while(fgets(line, sizeof(line), fp) != NULL)
+ {
+ if(line[0] == '>')
+ {
+ sscanf(line + 7, "%d", &length);
+ index_array[++index] = length;
+ length_array[++i] = length;
+ }
+ }
+
+ num_ctg = index;
+ orig2new = 1;
+ //quick_sort_int(length_array,1,num_ctg);
+ qsort(&(length_array[1]), num_ctg, sizeof(length_array[0]), cmp_int);
+ //extract unique length
+ diff_len = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ for(j = i + 1; j <= num_ctg; j++)
+ if(length_array[j] != length_array[i])
+ break;
+
+ length_array[++diff_len] = length_array[i];
+ flag_array[diff_len] = i;
+ i = j - 1;
+ }
+
+ /*
+ for(i=1;i<=num_ctg;i++)
+ flag_array[i] = 0;
+ */
+ contig_array = (CONTIG *)ckalloc((num_ctg + 1) * sizeof(CONTIG));
+
+ //load edges
+ index = 0;
+ rewind(fp);
+
+ while(fgets(line, sizeof(line), fp) != NULL)
+ {
+ if(line[0] == '>')
+ {
+ // if(overlaplen<=31)
+ // sscanf(line,">length %u,%llx,%llx,%d,%d",&length,&from_kmer,&to_kmer,&bal_ed,&cvg);
+ // else
+ sscanf(line, ">length %u,%d,%d", &length, &bal_ed, &cvg);
+ newIndex = uniqueLenSearch(length_array, flag_array, diff_len, length);
+ index_array[++index] = newIndex;
+
+ contig_array[newIndex].length = length;
+ contig_array[newIndex].bal_edge = bal_ed + 1;
+ contig_array[newIndex].downwardConnect = NULL;
+ contig_array[newIndex].mask = 0;
+ contig_array[newIndex].flag = 0;
+ contig_array[newIndex].arcs = NULL;
+ contig_array[newIndex].seq = NULL;
+ contig_array[newIndex].multi = 0;
+ contig_array[newIndex].inSubGraph = 0;
+ contig_array[newIndex].cvg = cvg / 10;
+
+ if(cvg)
+ {
+ counter += length;
+ cvgSum += cvg * length;
+ }
+
+ fprintf(out_fp, "%d %d %d\n", index, newIndex, contig_array[newIndex].bal_edge);
+ }
+ }
+
+ if(counter)
+ //cvgAvg = cvgSum/counter > 2 ? cvgSum/counter : 3;
+ cvgAvg = cvgSum / counter / 10 > 2 ? cvgSum / counter / 10 : 3;
+
+ //mark repeats
+ int bal_i;
+ /*if(maskRep){
+ counter = 0;
+ for(i=1;i<=num_ctg;i++){
+ bal_i = getTwinCtg(i);
+ if((contig_array[i].cvg+contig_array[bal_i].cvg)>4*cvgAvg){
+ contig_array[i].mask = 1;
+ contig_array[bal_i].mask = 1;
+ counter += 2;
+ }
+ if(isSmallerThanTwin(i))
+ i++;
+ }
+ printf("average contig coverage : %d. Number of contig(s) masked because of high coverage: %llx\n",
+ cvgAvg,counter);
+ }*/
+
+ counter = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].mask)
+ continue;
+
+ bal_i = getTwinCtg(i);
+
+ if(contig_array[i].length < ctg_short)
+ {
+ contig_array[i].mask = 1;
+ contig_array[bal_i].mask = 1;
+ counter += 2;
+ }
+
+ if(isSmallerThanTwin(i))
+ i++;
+ }
+
+ //printf("Mask contigs shorter than %d, %lld contig masked\n",
+ // ctg_short,counter);
+ printf("[%s]Number of contig(s) masked because of shortie: %lld\n", __FUNCTION__, counter);
+ //loadArcs(graphfile);
+ //tipsCount();
+ loadContig(graphfile);
+ //printf("done loading updated edges\n");
+ fflush(stdout);
+ free((void *)length_array);
+ free((void *)flag_array);
+ fclose(fp);
+ fclose(out_fp);
+}
+
+/*static void add1Arc(unsigned int from_ed,unsigned int to_ed, unsigned int weight)
+{
+ preARC *parc;
+ unsigned int from_c = index_array[from_ed];
+ unsigned int to_c = index_array[to_ed];
+
+ parc = allocatePreArc(to_c);
+ parc->multiplicity = weight;
+ parc->next = contig_array[from_c].arcs;
+ contig_array[from_c].arcs = parc;
+}*/
+
+/*void loadArcs(char *graphfile)
+{
+ FILE *fp;
+ char name[256],line[1024];
+ unsigned int target,weight;
+ unsigned int from_ed;
+ char *seg;
+
+ sprintf(name,"%s.Arc",graphfile);
+ fp = ckopen(name,"r");
+
+ createPreArcMemManager();
+ arcCounter = 0;
+ while(fgets(line,sizeof(line),fp)!=NULL){
+ seg = strtok(line," ");
+ from_ed = atoi(seg);
+ //printf("%d\n",from_ed);
+ while((seg=strtok(NULL," "))!=NULL){
+ target = atoi(seg);
+ seg = strtok(NULL," ");
+ weight = atoi(seg);
+ add1Arc(from_ed,target,weight);
+
+ }
+ }
+ printf("%lld arcs loaded\n",arcCounter);
+ fclose(fp);
+}*/
+
+void loadContig(char *graphfile)
+{
+ //fprintf(stderr,"[%s]entering this function\n",__FUNCTION__);
+ char c, name[256], line[1024], *tightSeq = NULL;
+ FILE *fp;
+ int n = 0, length, index = -1, edgeno;
+ unsigned int i;
+ unsigned int newIndex;
+
+ sprintf(name, "%s.contig", graphfile);
+ fp = ckopen(name, "r");
+
+ while(fgets(line, sizeof(line), fp) != NULL)
+ {
+ if(line[0] == '>')
+ {
+ if(index >= 0)
+ {
+ newIndex = index_array[edgeno];
+ contig_array[newIndex].seq = tightSeq;
+ }
+
+ n = 0;
+ index++;
+ sscanf(line + 1, "%d %s %d", &edgeno, name, &length);
+ //printf("contig %d, length %d\n",edgeno,length);
+ tightSeq = (char *)ckalloc((length / 4 + 1) * sizeof(char));
+ //fprintf(stderr,"[%s]loaded %d.\n",__FUNCTION__,edgeno);
+ }
+ else
+ {
+ int tmp_len = strlen(line);
+
+ for(i = 0; i < tmp_len; i++)
+ {
+ if(line[i] >= 'a' && line[i] <= 'z')
+ {
+ c = base2int(line[i] - 'a' + 'A');
+ writeChar2tightString(c, tightSeq, n++);
+ }
+ else if(line[i] >= 'A' && line[i] <= 'Z')
+ {
+ c = base2int(line[i]);
+ writeChar2tightString(c, tightSeq, n++);
+ }
+ }
+ }
+
+ }
+
+ if(index >= 0)
+ {
+ newIndex = index_array[edgeno];
+ contig_array[newIndex].seq = tightSeq;
+ }
+
+ printf("[%s]input %d contigs\n", __FUNCTION__, index + 1);
+ fclose(fp);
+
+ //printf("the %dth contig with index 107\n",index);
+}
+void freeContig_array()
+{
+ if(!contig_array)
+ return;
+
+ unsigned int i;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].seq)
+ free((void *)contig_array[i].seq);
+
+ if(contig_array[i].closeReads)
+ freeStack(contig_array[i].closeReads);
+ }
+
+ free((void *)contig_array);
+ contig_array = NULL;
+}
+/*
+void loadCvg(char *graphfile)
+{
+ char name[256],line[1024];
+ FILE *fp;
+ int cvg;
+ unsigned int newIndex,edgeno,bal_ctg;
+
+ sprintf(name,"%s.contigCVG",graphfile);
+ fp = fopen(name,"r");
+ if(!fp){
+ printf("contig coverage file %s is not found!\n",name);
+ return;
+ }
+
+ while(fgets(line,sizeof(line),fp)!=NULL){
+ if(line[0]=='>'){
+ sscanf(line+1,"%d %d",&edgeno,&cvg);
+ newIndex = index_array[edgeno];
+ cvg = cvg <= 255 ? cvg:255;
+ contig_array[newIndex].multi = cvg;
+ bal_ctg = getTwinCtg(newIndex);
+ contig_array[bal_ctg].multi= cvg;
+ }
+ }
+ fclose(fp);
+}
+*/
diff --git a/fusion/localAsm.c b/fusion/localAsm.c
new file mode 100644
index 0000000..5dd5929
--- /dev/null
+++ b/fusion/localAsm.c
@@ -0,0 +1,1976 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+#define CTGendLen 35 // shouldn't larger than max_read_len
+#define UPlimit 5000
+#define MaxRouteNum 10
+
+static Kmer pubKmer = 0x1b4d65165b;
+
+static void kmerSet_mark(KmerSet *set);
+static void trace4Repeat(Kmer currW, int steps, int min, int max, int *num_route,
+ KmerSet *kset, Kmer kmerDest, int overlap, Kmer WORDF,
+ int *traceCounter, int maxRoute, kmer_t **soFarNode, short *multiOccu1, short *multiOccu2,
+ int *routeLens, char **foundRoutes, char *soFarSeq,
+ long long *soFarLinks, double *avgLinks);
+
+static Kmer prevKmerLocal(Kmer next, char ch, int overlap)
+{
+ Kmer word = next;
+ word >>= 2;
+ word += ((Kmer)ch) << 2 * (overlap - 1);
+ return word;
+}
+static Kmer nextKmerLocal(Kmer prev, char ch, Kmer WordFilter)
+{
+ Kmer word = prev;
+ word <<= 2;
+ word &= WordFilter;
+ word += ch;
+ return word;
+}
+static void singleKmer(int t, KmerSet *kset, int flag, Kmer *kmerBuffer, char *prevcBuffer, char *nextcBuffer)
+{
+ kmer_t *pos;
+
+ put_kmerset(kset, kmerBuffer[t], prevcBuffer[t], nextcBuffer[t], &pos);
+
+ if(pos->inEdge == flag)
+ return;
+ else if(pos->inEdge == 0)
+ pos->inEdge = flag;
+ else if(pos->inEdge == 1 && flag == 2)
+ pos->inEdge = 3;
+ else if(pos->inEdge == 2 && flag == 1)
+ pos->inEdge = 3;
+
+}
+
+static void putKmer2DBgraph(KmerSet *kset, int flag, int kmer_c, Kmer *kmerBuffer, char *prevcBuffer, char *nextcBuffer)
+{
+ int t;
+
+ for(t = 0; t < kmer_c; t++)
+ singleKmer(t, kset, flag, kmerBuffer, prevcBuffer, nextcBuffer);
+
+}
+
+static void getSeqFromRead(READNEARBY read, char *src_seq)
+{
+ int len_seq = read.len;
+ int j;
+ char *tightSeq = (char *)darrayGet(readSeqInGap, read.seqStarter);
+
+ for(j = 0; j < len_seq; j++)
+ src_seq[j] = getCharInTightString(tightSeq, j);
+}
+
+static void chopKmer4Ctg(Kmer *kmerCtg, int lenCtg, int overlap, char *src_seq, Kmer WORDF)
+{
+ int index, j;
+ Kmer word = 0;
+
+ for (index = 0; index < overlap; index++)
+ {
+ word <<= 2;
+ word += src_seq[index];
+ }
+
+ index = 0;
+ kmerCtg[index++] = word;
+
+ for(j = 1; j <= lenCtg - overlap; j ++)
+ {
+ word = nextKmerLocal(word, src_seq[j - 1 + overlap], WORDF);
+ kmerCtg[index++] = word;
+ }
+}
+
+static void chopKmer4read(int len_seq, int overlap, char *src_seq, char *bal_seq,
+ Kmer *kmerBuffer, char *prevcBuffer, char *nextcBuffer, int *kmer_c, Kmer WORDF)
+{
+ int j, bal_j;
+ Kmer word, bal_word;
+ int index;
+ char InvalidCh = 4;
+
+ if(len_seq < overlap + 1)
+ {
+ *kmer_c = 0;
+ return;
+ }
+
+ word = 0;
+
+ for (index = 0; index < overlap; index++)
+ {
+ word <<= 2;
+ word += src_seq[index];
+ }
+
+ reverseComplementSeq(src_seq, len_seq, bal_seq);
+
+ // complementary node
+ bal_word = reverseComplement(word, overlap);
+ bal_j = len_seq - 0 - overlap; // 0;
+ index = 0;
+
+ if(word < bal_word)
+ {
+ kmerBuffer[index] = word;
+ prevcBuffer[index] = InvalidCh;
+ nextcBuffer[index++] = src_seq[0 + overlap];
+ }
+ else
+ {
+ kmerBuffer[index] = bal_word;
+ prevcBuffer[index] = bal_seq[bal_j - 1];
+ nextcBuffer[index++] = InvalidCh;
+ }
+
+ for(j = 1; j <= len_seq - overlap; j ++)
+ {
+ word = nextKmerLocal(word, src_seq[j - 1 + overlap], WORDF);
+ bal_j = len_seq - j - overlap; // j;
+ bal_word = prevKmerLocal(bal_word, bal_seq[bal_j], overlap);
+
+ if(word < bal_word)
+ {
+ kmerBuffer[index] = word;
+ prevcBuffer[index] = src_seq[j - 1];
+
+ if(j < len_seq - overlap)
+ nextcBuffer[index++] = src_seq[j + overlap];
+ else
+ nextcBuffer[index++] = InvalidCh;
+
+ //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
+ }
+ else
+ {
+ // complementary node
+ kmerBuffer[index] = bal_word;
+
+ if(bal_j > 0)
+ prevcBuffer[index] = bal_seq[bal_j - 1];
+ else
+ prevcBuffer[index] = InvalidCh;
+
+ nextcBuffer[index++] = bal_seq[bal_j + overlap];
+ //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
+ }
+ }
+
+ *kmer_c = index;
+}
+
+static void headTightStr(char *tightStr, int length, int start, int headLen, int revS, char *src_seq)
+{
+ int i, index = 0;
+
+ if(!revS)
+ {
+ for(i = start; i < start + headLen; i++)
+ src_seq[index++] = getCharInTightString(tightStr, i);
+ }
+ else
+ {
+ for(i = length - 1 - start; i >= length - headLen - start; i--)
+ src_seq[index++] = int_comp(getCharInTightString(tightStr, i));
+ }
+}
+
+static int getSeqFromCtg(CTGinSCAF *ctg, boolean fromHead, unsigned int len, int originOverlap, char *src_seq)
+{
+ unsigned int ctgId = ctg->ctgID;
+ unsigned int bal_ctg = getTwinCtg(ctgId);
+
+ if(contig_array[ctgId].length < 1)
+ return 0;
+
+ unsigned int length = contig_array[ctgId].length + originOverlap;
+
+ len = len < length ? len : length;
+
+ if(fromHead)
+ {
+ if(contig_array[ctgId].seq)
+ headTightStr(contig_array[ctgId].seq, length, 0, len, 0, src_seq);
+ else
+ headTightStr(contig_array[bal_ctg].seq, length, 0, len, 1, src_seq);
+ }
+ else
+ {
+ if(contig_array[ctgId].seq)
+ headTightStr(contig_array[ctgId].seq, length, length - len, len, 0, src_seq);
+ else
+ headTightStr(contig_array[bal_ctg].seq, length, length - len, len, 1, src_seq);
+ }
+
+ return len;
+}
+
+
+static KmerSet *readsInGap2DBgraph(READNEARBY *rdArray, int num, CTGinSCAF *ctg1, CTGinSCAF *ctg2, int originOverlap,
+ Kmer *kmerCtg1, Kmer *kmerCtg2, int overlap, Kmer WordFilter)
+{
+ int kmer_c;
+ Kmer *kmerBuffer;
+ char *nextcBuffer, *prevcBuffer;
+ int i;
+ int buffer_size = maxReadLen > CTGendLen ? maxReadLen : CTGendLen;
+ KmerSet *kmerS = NULL;
+ int lenCtg1;
+ int lenCtg2;
+ char *bal_seq;
+ char *src_seq;
+
+ src_seq = (char *)ckalloc(buffer_size * sizeof(char));
+ bal_seq = (char *)ckalloc(buffer_size * sizeof(char));
+
+ kmerBuffer = (Kmer *)ckalloc(buffer_size * sizeof(Kmer));
+ prevcBuffer = (char *)ckalloc(buffer_size * sizeof(char));
+ nextcBuffer = (char *)ckalloc(buffer_size * sizeof(char));
+
+ kmerS = init_kmerset(1024, 0.77f);
+
+ for(i = 0; i < num; i++)
+ {
+ getSeqFromRead(rdArray[i], src_seq);
+ chopKmer4read(rdArray[i].len, overlap, src_seq, bal_seq,
+ kmerBuffer, prevcBuffer, nextcBuffer, &kmer_c, WordFilter);
+ putKmer2DBgraph(kmerS, 0, kmer_c, kmerBuffer, prevcBuffer, nextcBuffer);
+ }
+
+ lenCtg1 = getSeqFromCtg(ctg1, 0, CTGendLen, originOverlap, src_seq);
+ chopKmer4Ctg(kmerCtg1, lenCtg1, overlap, src_seq, WordFilter);
+ chopKmer4read(lenCtg1, overlap, src_seq, bal_seq,
+ kmerBuffer, prevcBuffer, nextcBuffer, &kmer_c, WordFilter);
+ putKmer2DBgraph(kmerS, 1, kmer_c, kmerBuffer, prevcBuffer, nextcBuffer);
+
+ lenCtg2 = getSeqFromCtg(ctg2, 1, CTGendLen, originOverlap, src_seq);
+ chopKmer4Ctg(kmerCtg2, lenCtg2, overlap, src_seq, WordFilter);
+ chopKmer4read(lenCtg2, overlap, src_seq, bal_seq,
+ kmerBuffer, prevcBuffer, nextcBuffer, &kmer_c, WordFilter);
+ putKmer2DBgraph(kmerS, 2, kmer_c, kmerBuffer, prevcBuffer, nextcBuffer);
+ /*
+ if(ctg1->ctgID==3733&&ctg2->ctgID==3067){
+ for(i=0;i<lenCtg2;i++)
+ printf("%c",int2base((int)src_seq[i]));
+ printf("\n");
+ }
+ */
+ //printf("sequence length chop from contigs on both sides: %d %d\n",lenCtg1,lenCtg2);
+ //kmerSet_deLoop(kmerS,WordFilter);
+ kmerSet_mark(kmerS);
+ free((void *)src_seq);
+ free((void *)bal_seq);
+ free((void *)kmerBuffer);
+ free((void *)nextcBuffer);
+ free((void *)prevcBuffer);
+
+ fflush(stdout);
+
+ return kmerS;
+}
+
+static void printKmer(FILE *fo, Kmer kmer, int overlap)
+{
+ int i;
+ char kmerSeq[32], ch;
+
+ for(i = overlap - 1; i >= 0; i--)
+ {
+ ch = kmer & 3;
+ kmer >>= 2;
+ kmerSeq[i] = ch;
+ }
+
+ for(i = 0; i < overlap; i++)
+ fprintf(fo, "%c", int2base((int)kmerSeq[i]));
+}
+
+static void kmerSet_mark(KmerSet *set)
+{
+ int i, in_num, out_num, cvgSingle;
+ kmer_t *rs;
+ long long counter = 0, linear = 0;
+ Kmer word;
+
+ set->iter_ptr = 0;
+
+ while(set->iter_ptr < set->size)
+ {
+ if(!is_kmer_entity_null(set->flags, set->iter_ptr))
+ {
+ in_num = out_num = 0;
+ rs = set->array + set->iter_ptr;
+ word = rs->seq;
+
+ for(i = 0; i < 4; i++)
+ {
+ cvgSingle = get_kmer_left_cov(*rs, i);
+
+ if(cvgSingle > 0)
+ {
+ in_num++;
+ }
+
+ cvgSingle = get_kmer_right_cov(*rs, i);
+
+ if(cvgSingle > 0)
+ {
+ out_num++;
+ }
+ }
+
+ if(rs->single)
+ {
+ counter++;
+ }
+
+ if(in_num == 1 && out_num == 1)
+ {
+ rs->linear = 1;
+ linear++;
+ }
+ }
+
+ set->iter_ptr ++;
+ }
+
+ //printf("Allocated %ld node, %ld single nodes, %ld linear\n",(long)count_kmerset(set),counter,linear);
+}
+
+static kmer_t *searchNode(Kmer word, KmerSet *kset, int overlap)
+{
+ Kmer bal_word = reverseComplement(word, overlap);
+ kmer_t *node;
+ boolean found;
+
+ if(word < bal_word)
+ found = search_kmerset(kset, word, &node);
+ else
+ found = search_kmerset(kset, bal_word, &node);
+
+ if(found)
+ return node;
+ else
+ return NULL;
+}
+
+static int searchKmerOnCtg(Kmer currW, Kmer *kmerDest, int num)
+{
+ int i;
+
+ for(i = 0; i < num; i++)
+ {
+ if(currW == kmerDest[i])
+ {
+ return i;
+ }
+ }
+
+ return -1;
+}
+
+// pick on from n items randomly
+static int nPick1(int *array, int n)
+{
+ int m, i;
+ m = n - 1; //(int)(drand48()*n);
+ int value = array[m];
+
+ for(i = m; i < n - 1; i++)
+ array[i] = array[i + 1];
+
+ return value;
+}
+
+static void traceAlongDBgraph(Kmer currW, int steps, int min, int max, int *num_route,
+ KmerSet *kset, Kmer *kmerDest, int num, int overlap, Kmer WORDF,
+ char **foundRoutes, int *routeEndOnCtg2, int *routeLens, char *soFarSeq,
+ int *traceCounter, int maxRoute, kmer_t **soFarNode, boolean *multiOccu,
+ long long *soFarLinks, double *avgLinks)
+{
+ (*traceCounter)++;
+
+ if(*traceCounter > UPlimit)
+ {
+ /*
+ if(overlap==19&&kmerDest[0]==pubKmer)
+ printf("UPlimit\n");
+ */
+ return;
+ }
+
+ if(steps > max || *num_route >= maxRoute)
+ {
+ /*
+ if(overlap==19&&kmerDest[0]==pubKmer)
+ printf("max steps/maxRoute\n");
+ */
+ return;
+ }
+
+ Kmer word = reverseComplement(currW, overlap);
+ boolean isSmaller = currW < word;
+ int i;
+ char ch;
+ unsigned char links;
+
+ if(isSmaller)
+ word = currW;
+
+ kmer_t *node;
+ boolean found = search_kmerset(kset, word, &node);
+
+ if(!found)
+ {
+ printf("Trace: can't find kmer %llx (rc %llx, input %llx) at step %d\n", word,
+ reverseComplement(word, overlap), currW, steps);
+ return;
+ }
+
+ if(node->twin > 1)
+ return;
+
+ if(soFarNode)
+ soFarNode[steps] = node;
+
+ if(steps > 0)
+ soFarSeq[steps - 1] = currW & 0x03;
+
+ int index, end;
+ int linkCounter = *soFarLinks;
+
+ if(steps >= min && node->inEdge > 1 && (end = searchKmerOnCtg(currW, kmerDest, num)) >= 0)
+ {
+ index = *num_route;
+
+ if(steps > 0)
+ avgLinks[index] = (double)linkCounter / steps;
+ else
+ avgLinks[index] = 0;
+
+ //find node that appears more than once in the path
+ multiOccu[index] = 0;
+
+ for(i = 0; i < steps + 1; i++)
+ soFarNode[i]->deleted = 0;
+
+ for(i = 0; i < steps + 1; i++)
+ {
+ if(soFarNode[i]->deleted)
+ {
+ multiOccu[index] = 1;
+ break;
+ }
+
+ soFarNode[i]->deleted = 1;
+ }
+
+ routeEndOnCtg2[index] = end;
+ routeLens[index] = steps;
+ char *array = foundRoutes[index];
+
+ for(i = 0; i < steps; i++)
+ array[i] = soFarSeq[i];
+
+ if(i < max)
+ array[i] = 4; //indicate the end of the sequence
+
+ *num_route = ++index;
+ return;
+ }
+
+ steps++;
+
+ if(isSmaller)
+ {
+ int array[] = {0, 1, 2, 3};
+
+ for(i = 4; i > 0; i--)
+ {
+ ch = nPick1(array, i);
+ links = get_kmer_right_cov(*node, ch);
+
+ if(!links)
+ continue;
+
+ *soFarLinks = linkCounter + links;
+ word = nextKmerLocal(currW, ch, WORDF);
+ traceAlongDBgraph(word, steps, min, max, num_route,
+ kset, kmerDest, num, overlap, WORDF,
+ foundRoutes, routeEndOnCtg2, routeLens, soFarSeq,
+ traceCounter, maxRoute, soFarNode, multiOccu,
+ soFarLinks, avgLinks);
+ }
+ }
+ else
+ {
+ int array[] = {0, 1, 2, 3};
+
+ for(i = 4; i > 0; i--)
+ {
+ ch = nPick1(array, i);
+ links = get_kmer_left_cov(*node, ch);
+
+ if(!links)
+ continue;
+
+ *soFarLinks = linkCounter + links;
+ word = nextKmerLocal(currW, int_comp(ch), WORDF);
+ traceAlongDBgraph(word, steps, min, max, num_route,
+ kset, kmerDest, num, overlap, WORDF,
+ foundRoutes, routeEndOnCtg2, routeLens, soFarSeq,
+ traceCounter, maxRoute, soFarNode, multiOccu,
+ soFarLinks, avgLinks);
+ }
+ }
+}
+
+static int searchFgap(KmerSet *kset, CTGinSCAF *ctg1, CTGinSCAF *ctg2, Kmer *kmerCtg1,
+ Kmer *kmerCtg2, unsigned int origOverlap, int overlap, DARRAY *gapSeqArray,
+ int len1, int len2, Kmer WordFilter, int *offset1, int *offset2, char *seqGap, int *cut1, int *cut2)
+{
+
+ int i;
+ int ret = 0;
+ kmer_t *node, **soFarNode;
+ int num_route;
+ int gapLen = ctg2->start - ctg1->end - origOverlap + overlap;
+ int min = gapLen - GLDiff > 0 ? gapLen - GLDiff : 0; //0531
+ int max = gapLen + GLDiff < 10 ? 10 : gapLen + GLDiff;
+ char **foundRoutes;
+ char *soFarSeq;
+ int traceCounter;
+ int *routeEndOnCtg2;
+ int *routeLens;
+ boolean *multiOccu;
+ long long soFarLinks;
+ double *avgLinks;
+
+ //mask linear internal linear kmer on contig1 end
+ routeEndOnCtg2 = (int *)ckalloc(MaxRouteNum * sizeof(int));
+ routeLens = (int *)ckalloc(MaxRouteNum * sizeof(int));
+ multiOccu = (boolean *)ckalloc(MaxRouteNum * sizeof(boolean));
+ short *MULTI1 = (short *)ckalloc(MaxRouteNum * sizeof(short));
+ short *MULTI2 = (short *)ckalloc(MaxRouteNum * sizeof(short));
+ soFarSeq = (char *)ckalloc(max * sizeof(char));
+ soFarNode = (kmer_t **)ckalloc((max + 1) * sizeof(kmer_t *));
+ foundRoutes = (char **)ckalloc(MaxRouteNum * sizeof(char *));;
+ avgLinks = (double *)ckalloc(MaxRouteNum * sizeof(double));;
+
+ for(i = 0; i < MaxRouteNum; i++)
+ foundRoutes[i] = (char *)ckalloc(max * sizeof(char));
+
+ for(i = len1 - 1; i >= 0; i--)
+ {
+
+ num_route = traceCounter = soFarLinks = 0;
+ int steps = 0;
+ traceAlongDBgraph(kmerCtg1[i], steps, min, max, &num_route,
+ kset, kmerCtg2, len2, overlap, WordFilter,
+ foundRoutes, routeEndOnCtg2, routeLens, soFarSeq,
+ &traceCounter, MaxRouteNum, soFarNode, multiOccu,
+ &soFarLinks, avgLinks);
+
+ if(num_route > 0)
+ {
+ int m, minEnd = routeEndOnCtg2[0];
+
+ for(m = 0; m < num_route; m++)
+ {
+ if(routeLens[m] < 0)
+ continue;
+
+ if(routeEndOnCtg2[m] < minEnd)
+ minEnd = routeEndOnCtg2[m];
+ }
+
+ /* else if(minFreq>1){
+ for(m=0;m<num_route;m++){
+ if(routeEndOnCtg2[m]!=minEnd)
+ continue;
+ for(j=0;j<max;j++){
+ if(foundRoutes[m][j]>3)
+ break;
+ printf("%c",int2base((int)foundRoutes[m][j]));
+ }
+ printf(": %4.2f\n",avgLinks[m]);
+ }
+ } */
+
+ num_route = traceCounter = soFarLinks = 0;
+ steps = 0;
+ trace4Repeat(kmerCtg1[i], steps, min, max, &num_route,
+ kset, kmerCtg2[minEnd], overlap, WordFilter,
+ &traceCounter, MaxRouteNum, soFarNode, MULTI1, MULTI2,
+ routeLens, foundRoutes, soFarSeq, &soFarLinks, avgLinks);
+ int j, best = 0;
+ int maxLen = routeLens[0];
+ double maxLink = avgLinks[0];
+ char *pt;
+ boolean repeat = 0, sameLen = 1;
+ int leftMost = max, rightMost = max;
+
+ if(num_route < 1)
+ {
+ fprintf(stderr, "After trace4Repeat: non route was found\n");
+ continue;
+ }
+
+ if(num_route > 1)
+ {
+ // if multi paths are found, we check on the repeatative occurrences and links/length
+ for(m = 0; m < num_route; m++)
+ {
+ if(routeLens[m] < 0)
+ continue;
+
+ if(MULTI1[m] >= 0 && MULTI2[m] >= 0)
+ {
+ repeat = 1;
+ leftMost = leftMost > MULTI1[m] ? MULTI1[m] : leftMost;
+ rightMost = rightMost > MULTI2[m] ? MULTI2[m] : rightMost;
+ }
+
+ if(routeLens[m] != maxLen)
+ sameLen = 0;
+
+ if(routeLens[m] < maxLen)
+ maxLen = routeLens[m];
+
+ if(avgLinks[m] > maxLink)
+ {
+ maxLink = avgLinks[m];
+ best = m;
+ }
+ }
+ }
+
+ if(repeat)
+ {
+ *offset1 = *offset2 = *cut1 = *cut2 = 0;
+ int index = 0;
+ char ch;
+
+ for(j = 0; j < leftMost; j++)
+ {
+ if(routeLens[0] < j + overlap + 1)
+ break;
+ else
+ ch = foundRoutes[0][j];
+
+ for(m = 1; m < num_route; m++)
+ {
+ if(routeLens[m] < 0)
+ continue;
+
+ if(ch != foundRoutes[m][j])
+ break;
+ }
+
+ if(m == num_route)
+ seqGap[index++] = ch;
+ else break;
+ }
+
+ *offset1 = index;
+ index = 0;
+
+ for(j = 0; j < rightMost; j++)
+ {
+ if(routeLens[0] - overlap - 1 < j)
+ break;
+ else
+ ch = foundRoutes[0][routeLens[0] - overlap - 1 - j];
+
+ for(m = 1; m < num_route; m++)
+ {
+ if(routeLens[m] < 0)
+ continue;
+
+ if(ch != foundRoutes[m][routeLens[m] - overlap - 1 - j])
+ break;
+ }
+
+ if(m == num_route)
+ index++;
+ else break;
+ }
+
+ *offset2 = index;
+
+ for(j = 0; j < *offset2; j++)
+ seqGap[*offset1 + *offset2 - 1 - j] = foundRoutes[0][routeLens[0] - overlap - 1 - j];
+
+ if(*offset1 > 0 || *offset2 > 0)
+ {
+ *cut1 = len1 - i - 1;
+ *cut2 = minEnd;
+
+ //fprintf(stderr,"\n");
+ for(m = 0; m < num_route; m++)
+ {
+ for(j = 0; j < max; j++)
+ {
+ if(foundRoutes[m][j] > 3)
+ break;
+
+ //fprintf(stderr,"%c",int2base((int)foundRoutes[m][j]));
+ }
+
+ //fprintf(stderr,": %4.2f\n",avgLinks[m]);
+ }
+
+ /*
+ fprintf(stderr,">Gap (%d + %d) (%d + %d)\n",*offset1,*offset2,*cut1,*cut2);
+ for(index=0;index<*offset1+*offset2;index++)
+ fprintf(stderr,"%c",int2base(seqGap[index]));
+ fprintf(stderr,"\n"); */
+ }
+
+ ret = 3;
+ break;
+ }
+
+ if(overlap + (len1 - i - 1) + minEnd - routeLens[best] > (int)origOverlap)
+ continue;
+
+ ctg1->gapSeqOffset = gapSeqArray->item_c;
+ ctg1->gapSeqLen = routeLens[best];
+
+ if(!darrayPut(gapSeqArray, ctg1->gapSeqOffset + maxLen / 4))
+ continue;
+
+ pt = (char *)darrayPut(gapSeqArray, ctg1->gapSeqOffset);
+
+ /*
+ printKmer(stderr,kmerCtg1[i],overlap);
+ fprintf(stderr,"-");
+ */
+ for(j = 0; j < max; j++)
+ {
+ if(foundRoutes[best][j] > 3)
+ break;
+
+ writeChar2tightString(foundRoutes[best][j], pt, j);
+ //fprintf(stderr,"%c",int2base((int)foundRoutes[best][j]));
+ }
+
+ //fprintf(stderr,": GAPSEQ %d + %d, avglink %4.2f\n",len1-i-1,minEnd,avgLinks[best]);
+ ctg1->cutTail = len1 - i - 1;
+ ctg2->cutHead = overlap + minEnd;
+ ctg2->scaftig_start = 0;
+
+ ret = 1;
+ break;
+ /* }if(num_route>1){
+ ret = 2;
+ break; */
+ }
+ else //mark node which leads to dead end
+ {
+ node = searchNode(kmerCtg1[i], kset, overlap);
+
+ if(node)
+ node->twin = 2;
+ }
+
+ }
+
+ for(i = 0; i < MaxRouteNum; i++)
+ free((void *)foundRoutes[i]);
+
+ free((void *)soFarSeq);
+ free((void *)soFarNode);
+ free((void *)multiOccu);
+ free((void *)MULTI1);
+ free((void *)MULTI2);
+ free((void *)foundRoutes);
+ free((void *)routeEndOnCtg2);
+ free((void *)routeLens);
+
+ return ret;
+}
+
+static void trace4Repeat(Kmer currW, int steps, int min, int max, int *num_route,
+ KmerSet *kset, Kmer kmerDest, int overlap, Kmer WORDF,
+ int *traceCounter, int maxRoute, kmer_t **soFarNode, short *multiOccu1, short *multiOccu2,
+ int *routeLens, char **foundRoutes, char *soFarSeq,
+ long long *soFarLinks, double *avgLinks)
+{
+ (*traceCounter)++;
+
+ if(*traceCounter > UPlimit)
+ return;
+
+ if(steps > max || *num_route >= maxRoute)
+ return;
+
+ Kmer word = reverseComplement(currW, overlap);
+ boolean isSmaller = currW < word;
+ char ch;
+ unsigned char links;
+ int index, i;
+
+ if(isSmaller)
+ word = currW;
+
+ kmer_t *node;
+ boolean found = search_kmerset(kset, word, &node);
+
+ if(!found)
+ {
+ printf("Trace: can't find kmer %llx (rc %llx, input %llx) at step %d\n", word,
+ reverseComplement(word, overlap), currW, steps);
+ return;
+ }
+
+ if(soFarNode)
+ soFarNode[steps] = node;
+
+ if(soFarSeq && steps > 0)
+ soFarSeq[steps - 1] = currW & 0x03;
+
+ int linkCounter;
+
+ if(soFarLinks)
+ linkCounter = *soFarLinks;
+
+ if(steps >= min && currW == kmerDest)
+ {
+ index = *num_route;
+
+ if(avgLinks && steps > 0)
+ avgLinks[index] = (double)linkCounter / steps;
+ else if(avgLinks)
+ avgLinks[index] = 0;
+
+ //find node that appears more than once in the path
+ if(multiOccu1 && multiOccu2)
+ {
+ for(i = 0; i < steps + 1; i++)
+ soFarNode[i]->deleted = 0;
+
+ int rightMost = 0;
+ boolean MULTI = 0;
+
+ for(i = 0; i < steps + 1; i++)
+ {
+ if(soFarNode[i]->deleted)
+ {
+ rightMost = rightMost < i - 1 ? i - 1 : rightMost;
+ MULTI = 1;
+ }
+
+ soFarNode[i]->deleted = 1;
+ }
+
+ if(!MULTI)
+ multiOccu1[index] = multiOccu2[index] = -1;
+ else
+ {
+ multiOccu2[index] = steps - 2 - rightMost < 0 ? 0 : steps - 2 - rightMost; //[0 steps-2]
+
+ for(i = 0; i < steps + 1; i++)
+ soFarNode[i]->deleted = 0;
+
+ int leftMost = steps - 2;
+
+ for(i = steps; i >= 0; i--)
+ {
+ if(soFarNode[i]->deleted)
+ leftMost = leftMost > i - 1 ? i - 1 : leftMost;
+
+ soFarNode[i]->deleted = 1;
+ }
+
+ multiOccu1[index] = leftMost < 0 ? 0 : leftMost; //[0 steps-2]
+ }
+ }
+
+ if(routeLens)
+ routeLens[index] = steps;
+
+ if(soFarSeq)
+ {
+ char *array = foundRoutes[index];
+
+ for(i = 0; i < steps; i++)
+ array[i] = soFarSeq[i];
+
+ if(i < max)
+ array[i] = 4; //indicate the end of the sequence
+ }
+
+ *num_route = ++index;
+ }
+
+ steps++;
+
+ if(isSmaller)
+ {
+ int array[] = {0, 1, 2, 3};
+
+ for(i = 4; i > 0; i--)
+ {
+ ch = nPick1(array, i);
+ links = get_kmer_right_cov(*node, ch);
+
+ if(!links)
+ continue;
+
+ if(soFarLinks)
+ *soFarLinks = linkCounter + links;
+
+ word = nextKmerLocal(currW, ch, WORDF);
+ trace4Repeat(word, steps, min, max, num_route,
+ kset, kmerDest, overlap, WORDF, traceCounter, maxRoute, soFarNode,
+ multiOccu1, multiOccu2, routeLens, foundRoutes, soFarSeq,
+ soFarLinks, avgLinks);
+ }
+ }
+ else
+ {
+ int array[] = {0, 1, 2, 3};
+
+ for(i = 4; i > 0; i--)
+ {
+ ch = nPick1(array, i);
+ links = get_kmer_left_cov(*node, ch);
+
+ if(!links)
+ continue;
+
+ if(soFarLinks)
+ *soFarLinks = linkCounter + links;
+
+ word = nextKmerLocal(currW, int_comp(ch), WORDF);
+ trace4Repeat(word, steps, min, max, num_route,
+ kset, kmerDest, overlap, WORDF, traceCounter, maxRoute, soFarNode,
+ multiOccu1, multiOccu2, routeLens, foundRoutes, soFarSeq,
+ soFarLinks, avgLinks);
+ }
+ }
+}
+
+//found repeat node on contig ends
+static void maskRepeatNode(KmerSet *kset, Kmer *kmerCtg1,
+ Kmer *kmerCtg2, int overlap,
+ int len1, int len2, int max, Kmer WordFilter)
+{
+ int i;
+ int num_route, steps;
+ int min = 1, maxRoute = 1;
+ int traceCounter;
+ Kmer word, bal_word;
+ kmer_t *node;
+ boolean found;
+ int counter = 0;
+
+ for(i = 0; i < len1; i++)
+ {
+ word = kmerCtg1[i];
+ bal_word = reverseComplement(word, overlap);
+
+ if(word > bal_word)
+ word = bal_word;
+
+ found = search_kmerset(kset, word, &node);
+
+ if(!found || node->linear)
+ {
+ //printf("Found no node for kmer %llx\n",word);
+ continue;
+ }
+
+ num_route = traceCounter = 0;
+ steps = 0;
+ trace4Repeat(word, steps, min, max, &num_route,
+ kset, word, overlap, WordFilter,
+ &traceCounter, maxRoute, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+
+ if(num_route < 1)
+ continue;
+
+ counter++;
+ node->checked = 1;
+ }
+
+ for(i = 0; i < len2; i++)
+ {
+ word = kmerCtg2[i];
+ bal_word = reverseComplement(word, overlap);
+
+ if(word > bal_word)
+ word = bal_word;
+
+ found = search_kmerset(kset, word, &node);
+
+ if(!found || node->linear)
+ {
+ //printf("Found no node for kmer %llx\n",word);
+ continue;
+ }
+
+ num_route = traceCounter = 0;
+ steps = 0;
+ trace4Repeat(word, steps, min, max, &num_route,
+ kset, word, overlap, WordFilter,
+ &traceCounter, maxRoute, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+
+ if(num_route < 1)
+ continue;
+
+ counter++;
+ node->checked = 1;
+ }
+
+ //printf("MR: %d(%d)\n",counter,len1+len2);
+}
+
+/*
+static boolean chopReadFillGap(int len_seq,int overlap,char *src_seq, char *bal_seq,
+ KmerSet *kset,Kmer WORDF,int *start,int *end,boolean *bal,
+ Kmer *KmerCtg1,int len1,Kmer *KmerCtg2,int len2,int *index1,int *index2)
+{
+ int index,j=0,bal_j;
+ Kmer word,bal_word;
+ int flag=0,bal_flag=0;
+ int ctg1start,bal_ctg1start,ctg2end,bal_ctg2end;
+ int seqStart,bal_start,seqEnd,bal_end;
+ kmer_t *node;
+ boolean found;
+
+ if(len_seq<overlap+1){
+ return 0;
+ }
+ word = 0;
+ for (index = 0;index<overlap;index++){
+ word <<= 2;
+ word += src_seq[index];
+ }
+ reverseComplementSeq(src_seq, len_seq,bal_seq);
+
+ // complementary node
+ bal_word = reverseComplement(word,overlap);
+ bal_j = len_seq-0-overlap; // 0;
+ flag = bal_flag = 0;
+ if(word<bal_word){
+ found = search_kmerset(kset,word,&node);
+ }else{
+ found = search_kmerset(kset,bal_word,&node);
+ }
+ if(found&&!node->linear&&!node->checked){
+ if(!flag&&node->inEdge==1){
+ ctg1start = searchKmerOnCtg(word,KmerCtg1,len1);
+ if(ctg1start>0){
+ flag = 1;
+ seqStart = j + overlap-1;
+ }
+ }
+ if(!bal_flag&&node->inEdge==2){
+ bal_ctg2end = searchKmerOnCtg(bal_word,KmerCtg2,len2);
+ if(bal_ctg2end>0){
+ bal_flag = 2;
+ bal_end = bal_j+overlap-1;
+ }
+ }
+ }
+
+ for(j = 1; j <= len_seq - overlap; j ++) {
+ word = nextKmerLocal(word,src_seq[j-1+overlap],WORDF);
+ bal_j = len_seq-j-overlap; // j;
+ bal_word = prevKmerLocal(bal_word,bal_seq[bal_j],overlap);
+
+ if(word<bal_word){
+ found = search_kmerset(kset,word,&node);
+ }else{
+ found = search_kmerset(kset,bal_word,&node);
+ }
+ if(found&&!node->linear&&!node->checked){
+ if(!flag&&node->inEdge==1){
+ ctg1start = searchKmerOnCtg(word,KmerCtg1,len1);
+ if(ctg1start>0){
+ flag = 1;
+ seqStart = j + overlap-1;
+ }
+ }else if(flag==1&&node->inEdge==1){
+ index = searchKmerOnCtg(word,KmerCtg1,len1);
+ if(index>ctg1start){ // choose hit closer to gap
+ ctg1start = index;
+ seqStart = j + overlap-1;
+ }
+ }else if(flag==1&&node->inEdge==2){
+ ctg2end = searchKmerOnCtg(word,KmerCtg2,len2);
+ if(ctg2end>0){
+ flag = 3;
+ seqEnd = j+overlap-1;
+ break;
+ }
+ }
+
+ if(!bal_flag&&node->inEdge==2){
+ bal_ctg2end = searchKmerOnCtg(bal_word,KmerCtg2,len2);
+ if(bal_ctg2end>0){
+ bal_flag = 2;
+ bal_end = bal_j+overlap-1;
+ }
+ }else if(bal_flag==2&&node->inEdge==2){
+ index = searchKmerOnCtg(bal_word,KmerCtg2,len2);
+ if(index<bal_ctg2end){ // choose hit closer to gap
+ index = bal_ctg2end;
+ bal_end = bal_j+overlap-1;
+ }
+ }else if(bal_flag==2&&node->inEdge==1){
+ bal_ctg1start = searchKmerOnCtg(bal_word,KmerCtg1,len1);
+ if(bal_ctg1start>0){
+ bal_flag = 3;
+ bal_start = bal_j+overlap-1;
+ break;
+ }
+ }
+ }
+ }
+ if(flag==3){
+ *start = seqStart;
+ *end = seqEnd;
+ *bal = 0;
+ *index1 = ctg1start;
+ *index2 = ctg2end;
+ return 1;
+ }else if(bal_flag==3){
+ *start = bal_start;
+ *end = bal_end;
+ *bal = 1;
+ *index1 = bal_ctg1start;
+ *index2 = bal_ctg2end;
+ return 1;
+ }
+ return 0;
+}
+
+static boolean readsCrossGap(READNEARBY *rdArray, int num, int originOverlap,DARRAY *gapSeqArray,
+ Kmer *kmerCtg1,Kmer *kmerCtg2,int overlap,int len1,int len2,
+ CTGinSCAF *ctg1,CTGinSCAF *ctg2,KmerSet *kmerS,Kmer WordFilter,int min,int max)
+{
+ int i,j,start,end,startOnCtg1,endOnCtg2;
+ char *bal_seq;
+ char *src_seq;
+ char *pt;
+ boolean bal,ret=0,FILL;
+
+ src_seq = (char *)ckalloc(maxReadLen*sizeof(char));
+ bal_seq = (char *)ckalloc(maxReadLen*sizeof(char));
+
+ for(i=0;i<num;i++){
+ getSeqFromRead(rdArray[i],src_seq);
+ FILL = chopReadFillGap(rdArray[i].len,overlap,src_seq,bal_seq,
+ kmerS,WordFilter,&start,&end,&bal,
+ kmerCtg1,len1,kmerCtg2,len2,&startOnCtg1,&endOnCtg2);
+
+ if(!FILL||(end-start)<min||(end-start)>max)
+ continue;
+ fprintf(stderr,"Read across\n");
+ //printf("Filled: K %d, ctg1 %d ctg2 %d,start %d end %d\n",overlap,startOnCtg1,endOnCtg2,start,end);
+ if(overlap+(len1-startOnCtg1-1)+endOnCtg2-(end-start)>(int)originOverlap)
+ continue; // contig1 and contig2 could not overlap more than origOverlap bases
+
+ ctg1->gapSeqOffset = gapSeqArray->item_c;
+ ctg1->gapSeqLen = end-start;
+ if(!darrayPut(gapSeqArray,ctg1->gapSeqOffset+(end-start)/4))
+ continue;
+ pt = (char *)darrayPut(gapSeqArray,ctg1->gapSeqOffset);
+ for(j=start+1;j<=end;j++){
+ if(bal)
+ writeChar2tightString(bal_seq[j],pt,j-start-1);
+ else
+ writeChar2tightString(src_seq[j],pt,j-start-1);
+
+ }
+ ctg1->cutTail = len1-startOnCtg1-1;
+ ctg2->cutHead = overlap + endOnCtg2;
+ ctg2->scaftig_start = 0;
+
+ ret = 1;
+ break;
+ }
+
+ free((void*)src_seq);
+ free((void*)bal_seq);
+ return ret;
+}
+*/
+static void kmerSet_markTandem(KmerSet *set, Kmer WordFilter, int overlap);
+static boolean readsCrossGap(READNEARBY *rdArray, int num, int originOverlap, DARRAY *gapSeqArray,
+ Kmer *kmerCtg1, Kmer *kmerCtg2, int overlap,
+ CTGinSCAF *ctg1, CTGinSCAF *ctg2, KmerSet *kmerS, Kmer WordFilter, int min, int max,
+ int offset1, int offset2, char *seqGap, char *seqCtg1, char *seqCtg2, int cut1, int cut2);
+
+int localGraph(READNEARBY *rdArray, int num, CTGinSCAF *ctg1, CTGinSCAF *ctg2,
+ int origOverlap, Kmer *kmerCtg1, Kmer *kmerCtg2,
+ int overlap, DARRAY *gapSeqArray, char *seqCtg1, char *seqCtg2, char *seqGap)
+{
+ /**************** put kmer in DBgraph ****************/
+ KmerSet *kmerSet;
+ Kmer WordFilter = (((Kmer) 1) << (2 * overlap)) - 1;
+ /*
+ if(ctg1->ctgID==56410&&ctg2->ctgID==61741)
+ printf("Extract %d reads for gap [%d %d]\n",num,ctg1->ctgID,ctg2->ctgID);
+ */
+ kmerSet = readsInGap2DBgraph(rdArray, num, ctg1, ctg2, origOverlap,
+ kmerCtg1, kmerCtg2, overlap, WordFilter);
+ time_t tt;
+ time(&tt);
+ // srand48((int)tt);
+ /*
+ int i,j;
+ for(i=0;i<2;i++){
+ int array[] = {0,1,2,3};
+ for(j=4;j>0;j--)
+ fprintf(stderr,"%d ", nPick1(array,j));
+ }
+ fprintf(stderr,"\n");
+ */
+ /***************** search path to connect contig ends ********/
+ int gapLen = ctg2->start - ctg1->end - origOverlap + overlap;
+ int min = gapLen - GLDiff > 0 ? gapLen - GLDiff : 0;
+ int max = gapLen + GLDiff < 10 ? 10 : gapLen + GLDiff;
+ //count kmer number for contig1 and contig2 ends
+ int len1, len2;
+ len1 = CTGendLen < contig_array[ctg1->ctgID].length + origOverlap ?
+ CTGendLen : contig_array[ctg1->ctgID].length + origOverlap;
+ len2 = CTGendLen < contig_array[ctg2->ctgID].length + origOverlap ?
+ CTGendLen : contig_array[ctg2->ctgID].length + origOverlap;
+ len1 -= overlap - 1;
+ len2 -= overlap - 1;
+
+ //int pathNum = 2;
+ int offset1 = 0, offset2 = 0, cut1 = 0, cut2 = 0;
+ int pathNum = searchFgap(kmerSet, ctg1, ctg2, kmerCtg1, kmerCtg2,
+ origOverlap, overlap, gapSeqArray,
+ len1, len2, WordFilter, &offset1, &offset2, seqGap, &cut1, &cut2);
+
+ //printf("SF: %d K %d\n",pathNum,overlap);
+ if(pathNum == 0)
+ {
+ free_kmerset(kmerSet);
+ return 0;
+ }
+ else if(pathNum == 1)
+ {
+ free_kmerset(kmerSet);
+ return 1;
+ }/*
+
+ else{
+ printf("ret %d\n",pathNum);
+ free_kmerset(kmerSet);
+ return 0;
+ } */
+
+ /******************* cross the gap by single reads *********/
+ //kmerSet_markTandem(kmerSet,WordFilter,overlap);
+ maskRepeatNode(kmerSet, kmerCtg1, kmerCtg2, overlap,
+ len1, len2, max, WordFilter);
+ boolean found = readsCrossGap(rdArray, num, origOverlap, gapSeqArray,
+ kmerCtg1, kmerCtg2, overlap, ctg1, ctg2, kmerSet, WordFilter, min, max,
+ offset1, offset2, seqGap, seqCtg1, seqCtg2, cut1, cut2);
+
+ if(found)
+ {
+ //fprintf(stderr,"read across\n");
+ free_kmerset(kmerSet);
+ return found;
+ }
+ else
+ {
+ free_kmerset(kmerSet);
+ return 0;
+ }
+
+}
+
+static void kmerSet_markTandem(KmerSet *set, Kmer WordFilter, int overlap)
+{
+ kmer_t *rs;
+ long long counter = 0;
+ int num_route, steps;
+ int min = 1, max = overlap, maxRoute = 1;
+ int traceCounter;
+
+ set->iter_ptr = 0;
+
+ while(set->iter_ptr < set->size)
+ {
+ if(!is_kmer_entity_null(set->flags, set->iter_ptr))
+ {
+ rs = set->array + set->iter_ptr;
+
+ if(rs->inEdge > 0)
+ {
+ set->iter_ptr ++;
+ continue;
+ }
+
+ num_route = traceCounter = 0;
+ steps = 0;
+ trace4Repeat(rs->seq, steps, min, max, &num_route,
+ set, rs->seq, overlap, WordFilter,
+ &traceCounter, maxRoute, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
+
+ if(num_route < 1)
+ {
+ set->iter_ptr ++;
+ continue;
+ }
+
+ /*
+ printKmer(stderr,rs->seq,overlap);
+ fprintf(stderr, "\n");
+ */
+ rs->checked = 1;
+ counter++;
+ }
+
+ set->iter_ptr ++;
+ }
+}
+/******************* the following is for read-crossing gaps *************************/
+
+#define MAXREADLENGTH 100
+
+static const int INDEL = 0;
+static const int SIM[4][4] =
+{
+ {1, 0, 0, 0},
+ {0, 1, 0, 0},
+ {0, 0, 1, 0},
+ {0, 0, 0, 1}
+};
+static char fastSequence[MAXREADLENGTH];
+static char slowSequence[MAXREADLENGTH];
+
+static int Fmatrix[MAXREADLENGTH + 1][MAXREADLENGTH + 1];
+static int slowToFastMapping[MAXREADLENGTH + 1];
+static int fastToSlowMapping[MAXREADLENGTH + 1];
+
+static int max(int A, int B, int C)
+{
+ A = A >= B ? A : B;
+ return (A >= C ? A : C);
+
+}
+
+static int compareSequences(char *sequence1, char *sequence2, int length1, int length2)
+{
+ if(length1 < 1 || length2 < 1 || length1 > MAXREADLENGTH || length2 > MAXREADLENGTH)
+ return 0;
+
+ int i, j;
+ int Choice1, Choice2, Choice3;
+ int maxScore;
+
+ for (i = 0; i <= length1; i++)
+ Fmatrix[i][0] = 0;
+
+ for (j = 0; j <= length2; j++)
+ Fmatrix[0][j] = 0;
+
+ for (i = 1; i <= length1; i++)
+ {
+ for (j = 1; j <= length2; j++)
+ {
+ Choice1 =
+ Fmatrix[i - 1][j - 1] +
+ SIM[(int) sequence1[i - 1]]
+ [(int) sequence2[j - 1]];
+ Choice2 = Fmatrix[i - 1][j] + INDEL;
+ Choice3 = Fmatrix[i][j - 1] + INDEL;
+ Fmatrix[i][j] = max(Choice1, Choice2, Choice3);
+ }
+ }
+
+ maxScore = Fmatrix[length1][length2];
+ return maxScore;
+}
+
+static void mapSlowOntoFast(int slowSeqLength, int fastSeqLength)
+{
+ int slowIndex = slowSeqLength;
+ int fastIndex = fastSeqLength;
+ int fastn, slown;
+
+ if (slowIndex == 0)
+ {
+ slowToFastMapping[0] = fastIndex;
+
+ while (fastIndex >= 0)
+ fastToSlowMapping[fastIndex--] = 0;
+
+ return;
+ }
+
+ if (fastIndex == 0)
+ {
+ while (slowIndex >= 0)
+ slowToFastMapping[slowIndex--] = 0;
+
+ fastToSlowMapping[0] = slowIndex;
+
+ return;
+ }
+
+ while (slowIndex > 0 && fastIndex > 0)
+ {
+ fastn = (int) fastSequence[fastIndex - 1]; //getCharInTightString(fastSequence,fastIndex-1);
+ slown = (int) slowSequence[slowIndex - 1]; //getCharInTightString(slowSequence,slowIndex-1);
+
+ if (Fmatrix[fastIndex][slowIndex] ==
+ Fmatrix[fastIndex - 1][slowIndex - 1] +
+ SIM[fastn][slown])
+ {
+ fastToSlowMapping[--fastIndex] = --slowIndex;
+ slowToFastMapping[slowIndex] = fastIndex;
+ }
+ else if (Fmatrix[fastIndex][slowIndex] ==
+ Fmatrix[fastIndex - 1][slowIndex] + INDEL)
+ fastToSlowMapping[--fastIndex] = slowIndex - 1;
+
+ else if (Fmatrix[fastIndex][slowIndex] ==
+ Fmatrix[fastIndex][slowIndex - 1] + INDEL)
+ slowToFastMapping[--slowIndex] = fastIndex - 1;
+
+ else
+ {
+ printf("compareSequence: Error trace\n");
+ fflush(stdout);
+ abort();
+ }
+ }
+
+ while (slowIndex > 0)
+ slowToFastMapping[--slowIndex] = -1;
+
+ while (fastIndex > 0)
+ fastToSlowMapping[--fastIndex] = -1;
+
+ slowToFastMapping[slowSeqLength] =
+ fastSeqLength;
+ fastToSlowMapping[fastSeqLength] =
+ slowSeqLength;
+}
+
+static boolean chopReadFillGap(int len_seq, int overlap, char *src_seq, char *bal_seq,
+ KmerSet *kset, Kmer WORDF, int *start, int *end, boolean *bal,
+ Kmer *KmerCtg1, int len1, Kmer *KmerCtg2, int len2, int *index1, int *index2)
+{
+ int index, j = 0, bal_j;
+ Kmer word, bal_word;
+ int flag = 0, bal_flag = 0;
+ int ctg1start, bal_ctg1start, ctg2end, bal_ctg2end;
+ int seqStart, bal_start, seqEnd, bal_end;
+ kmer_t *node;
+ boolean found;
+
+ if(len_seq < overlap + 1)
+ {
+ return 0;
+ }
+
+ word = 0;
+
+ for (index = 0; index < overlap; index++)
+ {
+ word <<= 2;
+ word += src_seq[index];
+ }
+
+ reverseComplementSeq(src_seq, len_seq, bal_seq);
+
+ // complementary node
+ bal_word = reverseComplement(word, overlap);
+ bal_j = len_seq - 0 - overlap; // 0;
+ flag = bal_flag = 0;
+
+ if(word < bal_word)
+ {
+ found = search_kmerset(kset, word, &node);
+ }
+ else
+ {
+ found = search_kmerset(kset, bal_word, &node);
+ }
+
+ if(found && !node->linear && !node->checked)
+ {
+ if(!flag && node->inEdge == 1)
+ {
+ ctg1start = searchKmerOnCtg(word, KmerCtg1, len1);
+
+ if(ctg1start >= 0)
+ {
+ flag = 1;
+ seqStart = j + overlap - 1;
+ }
+ }
+
+ if(!bal_flag && node->inEdge == 2)
+ {
+ bal_ctg2end = searchKmerOnCtg(bal_word, KmerCtg2, len2);
+
+ if(bal_ctg2end >= 0)
+ {
+ bal_flag = 2;
+ bal_end = bal_j + overlap - 1;
+ }
+ }
+ }
+
+ for(j = 1; j <= len_seq - overlap; j ++)
+ {
+ word = nextKmerLocal(word, src_seq[j - 1 + overlap], WORDF);
+ bal_j = len_seq - j - overlap; // j;
+ bal_word = prevKmerLocal(bal_word, bal_seq[bal_j], overlap);
+
+ if(word < bal_word)
+ {
+ found = search_kmerset(kset, word, &node);
+ }
+ else
+ {
+ found = search_kmerset(kset, bal_word, &node);
+ }
+
+ if(found && !node->linear && !node->checked)
+ {
+ if(!flag && node->inEdge == 1)
+ {
+ ctg1start = searchKmerOnCtg(word, KmerCtg1, len1);
+
+ if(ctg1start >= 0)
+ {
+ flag = 1;
+ seqStart = j + overlap - 1;
+ }
+ }
+ else if(flag == 1 && node->inEdge == 1)
+ {
+ index = searchKmerOnCtg(word, KmerCtg1, len1);
+
+ if(index >= 0 && index > ctg1start) // choose hit closer to gap
+ {
+ ctg1start = index;
+ seqStart = j + overlap - 1;
+ }
+ }
+ else if(flag == 1 && node->inEdge == 2)
+ {
+ ctg2end = searchKmerOnCtg(word, KmerCtg2, len2);
+
+ if(ctg2end >= 0)
+ {
+ flag = 3;
+ seqEnd = j + overlap - 1;
+ break;
+ }
+ }
+
+ if(!bal_flag && node->inEdge == 2)
+ {
+ bal_ctg2end = searchKmerOnCtg(bal_word, KmerCtg2, len2);
+
+ if(bal_ctg2end >= 0)
+ {
+ bal_flag = 2;
+ bal_end = bal_j + overlap - 1;
+ }
+ }
+ else if(bal_flag == 2 && node->inEdge == 2)
+ {
+ index = searchKmerOnCtg(bal_word, KmerCtg2, len2);
+
+ if(index >= 0 && index < bal_ctg2end) // choose hit closer to gap
+ {
+ bal_ctg2end = index;
+ bal_end = bal_j + overlap - 1;
+ }
+ }
+ else if(bal_flag == 2 && node->inEdge == 1)
+ {
+ bal_ctg1start = searchKmerOnCtg(bal_word, KmerCtg1, len1);
+
+ if(bal_ctg1start >= 0)
+ {
+ bal_flag = 3;
+ bal_start = bal_j + overlap - 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if(flag == 3)
+ {
+ *start = seqStart;
+ *end = seqEnd;
+ *bal = 0;
+ *index1 = ctg1start;
+ *index2 = ctg2end;
+ return 1;
+ }
+ else if(bal_flag == 3)
+ {
+ *start = bal_start;
+ *end = bal_end;
+ *bal = 1;
+ *index1 = bal_ctg1start;
+ *index2 = bal_ctg2end;
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int cutSeqFromTightStr(char *tightStr, int length, int start, int end, int revS, char *src_seq)
+{
+ int i, index = 0;
+ end = end < length ? end : length - 1;
+ start = start >= 0 ? start : 0;
+
+ if(!revS)
+ {
+ for(i = start; i <= end; i++)
+ src_seq[index++] = getCharInTightString(tightStr, i);
+ }
+ else
+ {
+ for(i = length - 1 - start; i >= length - end - 1; i--)
+ src_seq[index++] = int_comp(getCharInTightString(tightStr, i));
+ }
+
+ return end - start + 1;
+}
+
+static int cutSeqFromCtg(unsigned int ctgID, int start, int end, char *sequence, int originOverlap)
+{
+
+ unsigned int bal_ctg = getTwinCtg(ctgID);
+
+ if(contig_array[ctgID].length < 1)
+ return 0;
+
+ int length = contig_array[ctgID].length + originOverlap;
+
+ if(contig_array[ctgID].seq)
+ return cutSeqFromTightStr(contig_array[ctgID].seq, length, start, end, 0, sequence);
+ else
+ return cutSeqFromTightStr(contig_array[bal_ctg].seq, length, start, end, 1, sequence);
+
+}
+
+static int cutSeqFromRead(char *src_seq, int length, int start, int end, char *sequence)
+{
+ if(end >= length)
+ printf("******: end %d length %d\n", end, length);
+
+ end = end < length ? end : length - 1;
+ start = start >= 0 ? start : 0;
+ int i;
+
+ for(i = start; i <= end; i++)
+ sequence[i - start] = src_seq[i];
+
+ return end - start + 1;
+}
+
+static void printSeq(FILE *fo, char *seq, int len)
+{
+ int i;
+
+ for(i = 0; i < len; i++)
+ fprintf(fo, "%c", int2base((int)seq[i]));
+
+ fprintf(fo, "\n");
+}
+
+static boolean readsCrossGap(READNEARBY *rdArray, int num, int originOverlap, DARRAY *gapSeqArray,
+ Kmer *kmerCtg1, Kmer *kmerCtg2, int overlap,
+ CTGinSCAF *ctg1, CTGinSCAF *ctg2, KmerSet *kmerS, Kmer WordFilter, int min, int max,
+ int offset1, int offset2, char *seqGap, char *seqCtg1, char *seqCtg2, int cut1, int cut2)
+{
+ int i, j, start, end, startOnCtg1, endOnCtg2;
+ char *bal_seq;
+ char *src_seq;
+ char *pt;
+ boolean bal, ret = 0, FILL;
+ double maxScore = 0.0;
+ int maxIndex;
+ int lenCtg1, lenCtg2;
+ //build sequences on left and right of the uncertain region
+ int buffer_size = maxReadLen > 100 ? maxReadLen : 100;
+ int length = contig_array[ctg1->ctgID].length + originOverlap;
+
+ if(buffer_size > offset1)
+ {
+ lenCtg1 = cutSeqFromCtg(ctg1->ctgID, length - cut1 - (buffer_size - offset1), length - 1 - cut1, seqCtg1, originOverlap);
+
+ for(i = 0; i < offset1; i++)
+ seqCtg1[lenCtg1 + i] = seqGap[i];
+
+ lenCtg1 += offset1;
+ }
+ else
+ {
+ for(i = offset1 - buffer_size; i < offset1; i++)
+ seqCtg1[i + buffer_size - offset1] = seqGap[i];
+
+ lenCtg1 = buffer_size;
+ }
+
+ length = contig_array[ctg2->ctgID].length + originOverlap;
+
+ if(buffer_size > offset2)
+ {
+ lenCtg2 = cutSeqFromCtg(ctg2->ctgID, cut2, buffer_size - offset2 - 1 + cut2, &(seqCtg2[offset2]), originOverlap);
+
+ for(i = 0; i < offset2; i++)
+ seqCtg2[i] = seqGap[i + offset1];
+
+ lenCtg2 += offset2;
+ }
+ else
+ {
+ for(i = 0; i < buffer_size; i++)
+ seqCtg2[i] = seqGap[i + offset1];
+
+ lenCtg2 = buffer_size;
+ }
+
+ /*
+ if(offset1>0||offset2>0){
+ for(i=0;i<lenCtg1;i++)
+ fprintf(stderr,"%c",int2base(seqCtg1[i]));
+ fprintf(stderr,": CTG1\n");
+ for(i=0;i<lenCtg2;i++)
+ fprintf(stderr,"%c",int2base(seqCtg2[i]));
+ fprintf(stderr,": CTG2\n");
+ }
+ */
+ //chop kmer from both ends of the uncertain region
+ int len1, len2;
+ len1 = CTGendLen < lenCtg1 ? CTGendLen : lenCtg1;
+ len2 = CTGendLen < lenCtg2 ? CTGendLen : lenCtg2;
+ chopKmer4Ctg(kmerCtg1, len1, overlap, &(seqCtg1[lenCtg1 - len1]), WordFilter);
+ chopKmer4Ctg(kmerCtg2, len2, overlap, seqCtg2, WordFilter);
+ len1 -= overlap - 1;
+ len2 -= overlap - 1;
+
+ src_seq = (char *)ckalloc(maxReadLen * sizeof(char));
+ bal_seq = (char *)ckalloc(maxReadLen * sizeof(char));
+
+ int *START = (int *)ckalloc(num * sizeof(int));
+ int *END = (int *)ckalloc(num * sizeof(int));
+ int *INDEX1 = (int *)ckalloc(num * sizeof(int));
+ int *INDEX2 = (int *)ckalloc(num * sizeof(int));
+ double *SCORE = (double *)ckalloc(num * sizeof(double));
+ boolean *BAL = (boolean *)ckalloc(num * sizeof(boolean));
+ memset(SCORE, 0, num * sizeof(double));
+
+ for(i = 0; i < num; i++)
+ {
+ getSeqFromRead(rdArray[i], src_seq);
+ FILL = chopReadFillGap(rdArray[i].len, overlap, src_seq, bal_seq,
+ kmerS, WordFilter, &start, &end, &bal,
+ kmerCtg1, len1, kmerCtg2, len2, &startOnCtg1, &endOnCtg2);
+
+ if(!FILL || (end - start) < min || (end - start) > max)
+ continue;
+
+ if(overlap + (len1 - startOnCtg1 - 1) + endOnCtg2 - (end - start) > (int)originOverlap)
+ continue; // contig1 and contig2 could not overlap more than origOverlap bases
+
+ START[i] = start;
+ END[i] = end;
+ INDEX1[i] = startOnCtg1;
+ INDEX2[i] = endOnCtg2;
+ BAL[i] = bal;
+
+ int matchLen = 2 * overlap < (end - start + overlap) ? 2 * overlap : (end - start + overlap);
+ int match;
+ int alignLen = matchLen;
+ //compare the left of hit kmer on ctg1
+ //int ctgLeft = (contig_array[ctg1->ctgID].length+originOverlap)-(len1+overlap-1)+startOnCtg1;
+ int ctgLeft = (lenCtg1) - (len1 + overlap - 1) + startOnCtg1;
+ int readLeft = start - overlap + 1;
+ int cmpLen = ctgLeft < readLeft ? ctgLeft : readLeft;
+
+ cmpLen = cmpLen <= MAXREADLENGTH ? cmpLen : MAXREADLENGTH;
+ //cutSeqFromCtg(ctg1->ctgID,ctgLeft-cmpLen,ctgLeft-1,fastSequence,originOverlap);
+ cutSeqFromRead(seqCtg1, lenCtg1, ctgLeft - cmpLen, ctgLeft - 1, fastSequence);
+
+ if(!bal)
+ cutSeqFromRead(src_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence);
+ else
+ cutSeqFromRead(bal_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence);
+
+ match = compareSequences(fastSequence, slowSequence, cmpLen, cmpLen);
+
+ alignLen += cmpLen;
+ matchLen += match;
+
+ //compare the right of hit kmer on ctg1
+ int ctgRight = len1 - startOnCtg1 - 1;
+
+ cmpLen = ctgRight < (rdArray[i].len - start - 1) ? ctgRight : (rdArray[i].len - start - 1);
+ cmpLen = cmpLen <= MAXREADLENGTH ? cmpLen : MAXREADLENGTH;
+ //cutSeqFromCtg(ctg1->ctgID,ctgLeft+overlap,ctgLeft+overlap+cmpLen-1,fastSequence,originOverlap);
+ cutSeqFromRead(seqCtg1, lenCtg1, ctgLeft + overlap, ctgLeft + overlap + cmpLen - 1, fastSequence);
+
+ if(!bal)
+ cutSeqFromRead(src_seq, rdArray[i].len, start + 1, start + cmpLen, slowSequence);
+ else
+ cutSeqFromRead(bal_seq, rdArray[i].len, start + 1, start + cmpLen, slowSequence);
+
+ match = compareSequences(fastSequence, slowSequence, cmpLen, cmpLen);
+ //fprintf(stderr,"%d -- %d\n",match,cmpLen);
+
+ alignLen += cmpLen;
+ matchLen += match;
+
+ //compare the left of hit kmer on ctg2
+ ctgLeft = endOnCtg2;
+ readLeft = end - overlap + 1;
+ cmpLen = ctgLeft < readLeft ? ctgLeft : readLeft;
+ cmpLen = ctgLeft <= MAXREADLENGTH ? ctgLeft : MAXREADLENGTH;
+ //cutSeqFromCtg(ctg2->ctgID,endOnCtg2-cmpLen,endOnCtg2-1,fastSequence,originOverlap);
+ cutSeqFromRead(seqCtg2, lenCtg2, endOnCtg2 - cmpLen, endOnCtg2 - 1, fastSequence);
+
+ if(!bal)
+ cutSeqFromRead(src_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence);
+ else
+ cutSeqFromRead(bal_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence);
+
+ match = compareSequences(fastSequence, slowSequence, cmpLen, cmpLen);
+ alignLen += cmpLen;
+ matchLen += match;
+
+ //compare the right of hit kmer on ctg2
+ //ctgRight = contig_array[ctg2->ctgID].length+originOverlap-endOnCtg2-overlap;
+ ctgRight = lenCtg2 - endOnCtg2 - overlap;
+ cmpLen = ctgRight < (rdArray[i].len - end - 1) ? ctgRight : (rdArray[i].len - end - 1);
+ cmpLen = cmpLen <= MAXREADLENGTH ? cmpLen : MAXREADLENGTH;
+ //cutSeqFromCtg(ctg2->ctgID,endOnCtg2+overlap,endOnCtg2+overlap+cmpLen-1,fastSequence,originOverlap);
+ cutSeqFromRead(seqCtg2, lenCtg2, endOnCtg2 + overlap, endOnCtg2 + overlap + cmpLen - 1, fastSequence);
+
+ if(!bal)
+ cutSeqFromRead(src_seq, rdArray[i].len, end + 1, end + cmpLen, slowSequence);
+ else
+ cutSeqFromRead(bal_seq, rdArray[i].len, end + 1, end + cmpLen, slowSequence);
+
+ match = compareSequences(fastSequence, slowSequence, cmpLen, cmpLen);
+ alignLen += cmpLen;
+ matchLen += match;
+ /*
+ if(cmpLen>0&&match!=cmpLen+overlap){
+ printSeq(stderr,fastSequence,cmpLen+overlap);
+ printSeq(stderr,slowSequence,cmpLen+overlap);
+ printKmer(stderr,kmerCtg2[endOnCtg2],overlap);
+ fprintf(stderr,": %d(%d)\n",bal,endOnCtg2);
+ }else if(cmpLen>0&&match==cmpLen+overlap)
+ fprintf(stderr,"Perfect\n");
+ */
+ double score = (double)matchLen / alignLen;
+
+ if(maxScore < score)
+ {
+ maxScore = score;
+ //fprintf(stderr,"%4.2f (%d/%d)\n",maxScore,matchLen,alignLen);
+ maxIndex = i;
+ }
+
+ SCORE[i] = score;
+ }
+
+ /*
+ if(maxScore>0.0)
+ fprintf(stderr,"SCORE: %4.2f\n",maxScore);
+ */
+ if(maxScore > 0.9)
+ {
+ /*
+ for(i=0;i<lenCtg1;i++)
+ fprintf(stderr,"%c",int2base(seqCtg1[i]));
+ fprintf(stderr,": CTG1\n");
+ for(i=0;i<lenCtg2;i++)
+ fprintf(stderr,"%c",int2base(seqCtg2[i]));
+ fprintf(stderr,": CTG2\n");
+ fprintf(stderr,"%d+%d -- %d+%d, SCORE: %4.2f\n ",offset1,offset2,cut1,cut2,maxScore);
+ */
+ getSeqFromRead(rdArray[maxIndex], src_seq);
+ reverseComplementSeq(src_seq, rdArray[maxIndex].len, bal_seq);
+
+ int leftRemain = offset1 - (len1 - INDEX1[maxIndex] - 1) > 0 ? offset1 - (len1 - INDEX1[maxIndex] - 1) : 0;
+ int rightRemain = offset2 - (overlap + INDEX2[maxIndex]) > 0 ? offset2 - (overlap + INDEX2[maxIndex]) : 0;
+
+ ctg1->gapSeqOffset = gapSeqArray->item_c;
+ ctg1->gapSeqLen = END[maxIndex] - START[maxIndex] + leftRemain + rightRemain;
+
+ if(darrayPut(gapSeqArray, ctg1->gapSeqOffset + (END[maxIndex] - START[maxIndex] + leftRemain + rightRemain) / 4))
+ {
+ pt = (char *)darrayPut(gapSeqArray, ctg1->gapSeqOffset);
+
+ for(j = 0; j < leftRemain; j++) //get the left side of the gap region from search
+ {
+ writeChar2tightString(seqGap[j], pt, j);
+ fprintf(stderr, "%c", int2base(seqGap[j]));
+ }
+
+ for(j = START[maxIndex] + 1; j <= END[maxIndex]; j++)
+ {
+ if(BAL[maxIndex])
+ {
+ writeChar2tightString(bal_seq[j], pt, j - START[maxIndex] - 1 + leftRemain);
+ fprintf(stderr, "%c", int2base(bal_seq[j]));
+ }
+ else
+ {
+ writeChar2tightString(src_seq[j], pt, j - START[maxIndex] - 1 + leftRemain);
+ fprintf(stderr, "%c", int2base(src_seq[j]));
+ }
+ }
+
+ for(j = offset2 - rightRemain; j < offset2; j++) //get the right side of the gap region from search
+ {
+ writeChar2tightString(seqGap[j + leftRemain], pt, j + END[maxIndex] - START[maxIndex] + leftRemain);
+ fprintf(stderr, "%c", int2base(seqGap[j + leftRemain]));
+ }
+
+ fprintf(stderr, ": GAPSEQ (%d+%d)(%d+%d)(%d+%d)(%d+%d) B %d\n", offset1, offset2, cut1, cut2,
+ len1 - INDEX1[maxIndex] - 1, INDEX2[maxIndex], START[maxIndex], END[maxIndex], BAL[maxIndex]);
+
+ ctg1->cutTail = len1 - INDEX1[maxIndex] - 1 - offset1 + cut1 > cut1 ? len1 - INDEX1[maxIndex] - 1 - offset1 + cut1 : cut1;
+ ctg2->cutHead = overlap + INDEX2[maxIndex] - offset2 + cut2 > cut2 ? overlap + INDEX2[maxIndex] - offset2 + cut2 : cut2;
+ ctg2->scaftig_start = 0;
+ ret = 1;
+ }
+ }
+
+ free((void *)START);
+ free((void *)END);
+ free((void *)INDEX1);
+ free((void *)INDEX2);
+ free((void *)SCORE);
+ free((void *)BAL);
+
+ free((void *)src_seq);
+ free((void *)bal_seq);
+ return ret;
+}
+
diff --git a/fusion/main.c b/fusion/main.c
new file mode 100644
index 0000000..def4582
--- /dev/null
+++ b/fusion/main.c
@@ -0,0 +1,210 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "global.h"
+
+
+extern int call_scaffold();
+extern int call_align();
+extern int call_bundle();
+extern int data_prepare();
+
+#define MAPPING 0
+#define SCAFF 1
+#define BUNDLE 2
+#define PREPARE 3
+#define POTENT 4
+static void usage();
+int main(int argc, char **argv)
+{
+ printf("Mapping & Scaffolding module.\n");
+
+ if(argc == 1)
+ {
+ usage();
+ return 0;
+ }
+
+ int c = 0;
+ int inpseq, outseq;
+ //char optarg[256];
+ int mode = -1;
+
+ //char temp[100];
+ while((c = getopt(argc, argv, "s:g:p:L:t:i:u:c:P:K:MSBDO")) != EOF)
+ {
+ switch(c)
+ {
+ case 'M':
+ mode = MAPPING;
+ break;
+
+ case 'S':
+ mode = SCAFF;
+ break;
+
+ case 'B':
+ mode = BUNDLE;
+ break;
+
+ case 'D':
+ mode = PREPARE;
+ break;
+
+ case 'O':
+ mode = POTENT;
+ break;
+
+ case 's':
+ inpseq = 1;
+ shortrdsfile = (char *)ckalloc(256 * sizeof(char));
+ strcpy(shortrdsfile, optarg);
+ break;
+
+ case 'g':
+ outseq = 1;
+ graphfile = (char *)ckalloc(256 * sizeof(char));
+ strcpy(graphfile, optarg);
+ break;
+
+ case 'p':
+ thrd_num = atoi(optarg);
+ break;
+
+ case 'L':
+ ctg_short = atoi(optarg);
+ break;
+
+ case 'P':
+ OverlapPercent = atof (optarg);
+ break;
+
+ case 't':
+ close_threshold = atof (optarg);
+ break;
+
+ case 'i':
+ ins_size_var = atoi (optarg);
+ break;
+
+ case 'u':
+ bund_threshold = atoi (optarg);
+ break;
+
+ case 'c':
+ ctg_file = (char *)ckalloc(256 * sizeof(char));
+ strcpy(ctg_file, optarg);
+ break;
+
+ case 'K':
+ overlaplen = atoi(optarg);
+ break;
+
+ case 'h':
+ usage();
+ break;
+
+ case '?':
+ usage();
+ exit(1);
+
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ if(mode == -1)
+ {
+ usage();
+ exit(1);
+ }
+ else if(mode == MAPPING)
+ {
+ printf("[%s]Mapping mode selected .\n", __FUNCTION__);
+
+ if(outseq == 0 || inpseq == 0)
+ {
+ usage();
+ exit(1);
+ }
+
+ call_align();
+ }
+ else if(mode == SCAFF)
+ {
+ printf("[%s]Scaffolding mode selected .\n", __FUNCTION__);
+
+ if(outseq == 0)
+ {
+ usage();
+ exit(1);
+ }
+
+ call_scaffold();
+ }
+ else if(mode == BUNDLE)
+ {
+ printf("[%s]Bundling mode selected .\n", __FUNCTION__);
+
+ if(outseq == 0)
+ {
+ usage();
+ exit(1);
+ }
+
+ call_bundle();
+ }
+ else if(mode == PREPARE)
+ {
+ printf("[%s]Data prepare mode selected .\n", __FUNCTION__);
+
+ if(outseq == 0 || ctg_file == NULL)
+ {
+ usage();
+ exit(1);
+ }
+
+ data_prepare();
+ }
+ else if(mode == POTENT)
+ {
+ printf("[%s]Potential analysis mode selected .\n", __FUNCTION__) ;
+
+ if(outseq == NULL)
+ {
+ usage();
+ exit(1);
+ }
+
+ potential();
+ }
+
+ return 0;
+}
+
+static void usage()
+{
+ printf("parameters:\n");
+ printf("global:\n");
+ printf("-s\tLibrary file.\n");
+ printf("-g\tPrefix of input files.\n");
+ printf("-p\tThreads.\n\n");
+ printf("Data prepare mode:\n");
+ printf("-D\tEnable this mode.\n");
+ printf("-K\tKmer.\n");
+ printf("-c\tInput contig file.(can't be name prefix.contig)\n\n");
+ printf("Mapping mode:\n");
+ printf("-M\tEnable this mode.\n\n");
+ printf("Bundling mode.\n");
+ printf("-B\tEnable this mode.\n");
+ printf("-u\tWeight threshold for outputting bundle file.(default 3)\n\n");
+ printf("Potential analysis mode.\n");
+ printf("-O\tEnable this mode.\n");
+ printf("Scaffolding mode:\n");
+ printf("-S\tEnable this mode.\n");
+ printf("-L\tthreshold for minimum length of contig(default K+2).\n");
+ printf("-P\tOverlap percent threshold for a subgraph(default 0.075).\n");
+ printf("-t\tOverlap percent threshold for a PE(default 0.2).\n");
+ printf("-i\tOverlap length threshold for remove transitive connect(default 20).\n");
+}
diff --git a/fusion/map.c b/fusion/map.c
new file mode 100644
index 0000000..22bf1ed
--- /dev/null
+++ b/fusion/map.c
@@ -0,0 +1,42 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+//static void initenv(int argc, char **argv);
+
+
+static void display_map_usage();
+
+int call_align()
+{
+ time_t start_t, stop_t, time_bef, time_aft;
+ time(&start_t);
+
+
+ time(&time_bef);
+ ctg_short = overlaplen + 2;
+ //printf("contig len cutoff: %d\n",ctg_short);
+ prlContig2nodes(graphfile, ctg_short);
+ time(&time_aft);
+ //printf("time spent on De bruijn graph construction: %ds\n\n",
+ // (int)(time_aft-time_bef));
+ //map read to edge one by one
+ //printf("All contigs loaded");
+ time(&time_bef);
+ prlLongRead2Ctg(shortrdsfile, graphfile);
+ time(&time_aft);
+ //printf("time spent on mapping long reads: %ds\n\n",(int)(time_aft-time_bef));
+
+ time(&time_bef);
+ prlRead2Ctg(shortrdsfile, graphfile);
+ time(&time_aft);
+ //printf("time spent on mapping reads: %ds\n\n",(int)(time_aft-time_bef));
+
+ free_Sets(KmerSets, thrd_num);
+
+ time(&stop_t);
+ //printf("overall time for alignment: %dm\n\n",(int)(stop_t-start_t)/60);
+ printf("[%s]total time on mapping reads to contig :%dm\n", __FUNCTION__, (int)(stop_t - start_t) / 60);
+ return 0;
+}
diff --git a/fusion/mem_manager.c b/fusion/mem_manager.c
new file mode 100644
index 0000000..d18b3cb
--- /dev/null
+++ b/fusion/mem_manager.c
@@ -0,0 +1,95 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+MEM_MANAGER *createMem_manager(int num_items, size_t unit_size)
+{
+ MEM_MANAGER *mem_Manager = (MEM_MANAGER *)ckalloc(1 * sizeof(MEM_MANAGER));
+
+ mem_Manager->block_list = NULL;
+ mem_Manager->items_per_block = num_items;
+ mem_Manager->item_size = unit_size;
+ mem_Manager->recycle_list = NULL;
+ mem_Manager->counter = 0;
+ return mem_Manager;
+}
+
+void freeMem_manager(MEM_MANAGER *mem_Manager)
+{
+ BLOCK_START *ite_block, *temp_block;
+
+ if(!mem_Manager)
+ return;
+
+ ite_block = mem_Manager->block_list;
+
+ while(ite_block)
+ {
+ temp_block = ite_block;
+ ite_block = ite_block->next;
+ free((void *)temp_block);
+ }
+
+ free((void *)mem_Manager);
+}
+
+void *getItem(MEM_MANAGER *mem_Manager)
+{
+ RECYCLE_MARK *mark; //this is the type of return value
+ BLOCK_START *block;
+
+ if(!mem_Manager)
+ return NULL;
+
+ if(mem_Manager->recycle_list)
+ {
+ mark = mem_Manager->recycle_list;
+ mem_Manager->recycle_list = mark->next;
+ return mark;
+ }
+
+ mem_Manager->counter++;
+
+ if(!mem_Manager->block_list || mem_Manager->index_in_block == mem_Manager->items_per_block)
+ {
+ //pthread_mutex_lock(&gmutex);
+ block = ckalloc(sizeof(BLOCK_START) + mem_Manager->items_per_block * mem_Manager->item_size);
+ //mem_Manager->counter += sizeof(BLOCK_START)+mem_Manager->items_per_block*mem_Manager->item_size;
+ //pthread_mutex_unlock(&gmutex);
+ block->next = mem_Manager->block_list;
+ mem_Manager->block_list = block;
+ mem_Manager->index_in_block = 1;
+ return (RECYCLE_MARK *)((void *)block + sizeof(BLOCK_START));
+ }
+
+ block = mem_Manager->block_list;
+ return (RECYCLE_MARK *)((void *)block + sizeof(BLOCK_START) + mem_Manager->item_size * (mem_Manager->index_in_block++));
+
+}
+
+void returnItem(MEM_MANAGER *mem_Manager, void *item)
+{
+ RECYCLE_MARK *mark;
+
+ mark = item;
+
+ mark->next = mem_Manager->recycle_list;
+ mem_Manager->recycle_list = mark;
+
+}
+
+/*
+void test_mem_manager()
+{
+ MEM_MANAGER *test_manager;
+ NODE *temp_node;
+
+ test_manager = createMem_manager(NODEBLOCKSIZE,sizeof(NODE));
+ temp_node = (NODE *)getItem(test_manager);
+ returnItem(test_manager,temp_node);
+
+ freeMem_manager(test_manager);
+}
+*/
+
diff --git a/fusion/newhash.c b/fusion/newhash.c
new file mode 100644
index 0000000..f1edd14
--- /dev/null
+++ b/fusion/newhash.c
@@ -0,0 +1,630 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+#define PUBLIC_FUNC
+#define PROTECTED_FUNC
+
+static const kmer_t empty_kmer = {0, 0, 0, 0, 0, 0, 1, 0, 0};
+
+static inline void update_kmer(kmer_t *mer, ubyte left, ubyte right)
+{
+ ubyte4 cov;
+
+ if(left < 4)
+ {
+ cov = get_kmer_left_cov(*mer, left);
+
+ if(cov < MAX_KMER_COV)
+ {
+ set_kmer_left_cov(*mer, left, cov + 1);
+ }
+ }
+
+ if(right < 4)
+ {
+ cov = get_kmer_right_cov(*mer, right);
+
+ if(cov < MAX_KMER_COV)
+ {
+ set_kmer_right_cov(*mer, right, cov + 1);
+ }
+ }
+}
+
+static inline void set_new_kmer(kmer_t *mer, ubyte8 seq, ubyte left, ubyte right)
+{
+ *mer = empty_kmer;
+ set_kmer_seq(*mer, seq);
+
+ if(left < 4)
+ set_kmer_left_cov(*mer, left, 1);
+
+ if(right < 4)
+ set_kmer_right_cov(*mer, right, 1);
+}
+
+
+static inline int is_prime_kh(ubyte8 num)
+{
+ ubyte8 i, max;
+
+ if(num < 4) return 1;
+
+ if(num % 2 == 0) return 0;
+
+ max = (ubyte8)sqrt((float)num);
+
+ for(i = 3; i < max; i += 2)
+ {
+ if(num % i == 0) return 0;
+ }
+
+ return 1;
+}
+
+static inline ubyte8 find_next_prime_kh(ubyte8 num)
+{
+ if(num % 2 == 0) num ++;
+
+ while(1)
+ {
+ if(is_prime_kh(num)) return num;
+
+ num += 2;
+ }
+}
+
+PUBLIC_FUNC KmerSet *init_kmerset(ubyte8 init_size, float load_factor)
+{
+ KmerSet *set;
+
+ if(init_size < 3) init_size = 3;
+ else init_size = find_next_prime_kh(init_size);
+
+ set = (KmerSet *)malloc(sizeof(KmerSet));
+ set->size = init_size;
+ set->count = 0;
+
+ set->searchCnt = 0;
+ set->foundCnt = 0;
+ set->delCnt = 0;
+ set->searchSpcSeedCnt = 0;
+ set->getSpcSeedCnt = 0;
+ set->levelGet[0] = 0;
+ set->levelGet[1] = 0;
+ set->levelGet[2] = 0;
+
+ set->max = set->size * load_factor;
+
+ if(load_factor <= 0) load_factor = 0.25f;
+ else if(load_factor >= 1) load_factor = 0.75f;
+
+ set->load_factor = load_factor;
+ set->iter_ptr = 0;
+ set->array = calloc(set->size, sizeof(kmer_t));
+ set->flags = malloc((set->size + 15) / 16 * 4);
+ memset(set->flags, 0x55, (set->size + 15) / 16 * 4);
+ return set;
+}
+
+PROTECTED_FUNC static inline ubyte8 get_kmerset(KmerSet *set, ubyte8 seq)
+{
+ ubyte8 hc;
+ hc = seq % set->size;
+
+ while(1)
+ {
+ if(is_kmer_entity_null(set->flags, hc))
+ {
+ return hc;
+ }
+ else
+ {
+ if(get_kmer_seq(set->array[hc]) == seq) return hc;
+ }
+
+ hc ++;
+
+ if(hc == set->size) hc = 0;
+ }
+
+ return set->size;
+}
+
+PUBLIC_FUNC int search_kmerset(KmerSet *set, ubyte8 seq, kmer_t **rs)
+{
+ ubyte8 hc;
+ hc = seq % set->size;
+
+ while(1)
+ {
+ if(is_kmer_entity_null(set->flags, hc))
+ {
+ return 0;
+ }
+ else
+ {
+ if(get_kmer_seq(set->array[hc]) == seq)
+ {
+ *rs = set->array + hc;
+ return 1;
+ }
+ }
+
+ hc ++;
+
+ if(hc == set->size) hc = 0;
+ }
+
+ return 0;
+}
+
+PUBLIC_FUNC static inline int exists_kmerset(KmerSet *set, ubyte8 seq)
+{
+ ubyte8 idx;
+ idx = get_kmerset(set, seq);
+ return !is_kmer_entity_null(set->flags, idx);
+}
+
+PROTECTED_FUNC static inline void encap_kmerset(KmerSet *set, ubyte8 num)
+{
+ ubyte4 *flags, *f;
+ ubyte8 i, n, size, hc;
+ kmer_t key, tmp;
+
+ if(set->count + num <= set->max) return;
+
+ n = set->size;
+
+ do
+ {
+ if(n < 0xFFFFFFFU)
+ n <<= 1;
+ else
+ n += 0xFFFFFFU;
+
+ n = find_next_prime_kh(n);
+ }
+ while(n * set->load_factor < set->count + num);
+
+ set->array = realloc(set->array, n * sizeof(kmer_t));
+
+ if(set->array == NULL)
+ {
+ fprintf(stderr, "-- Out of memory --\n");
+ abort();
+ }
+
+ flags = malloc((n + 15) / 16 * 4);
+ memset(flags, 0x55, (n + 15) / 16 * 4);
+ size = set->size;
+ set->size = n;
+ set->max = n * set->load_factor;
+ f = set->flags;
+ set->flags = flags;
+ flags = f;
+
+ for(i = 0; i < size; i++)
+ {
+ if(!exists_kmer_entity(flags, i)) continue;
+
+ key = set->array[i];
+ set_kmer_entity_del(flags, i);
+
+ while(1)
+ {
+ hc = get_kmer_seq(key) % set->size;
+
+ while(!is_kmer_entity_null(set->flags, hc))
+ {
+ hc ++;
+
+ if(hc == set->size) hc = 0;
+ }
+
+ clear_kmer_entity_null(set->flags, hc);
+
+ if(hc < size && exists_kmer_entity(flags, hc))
+ {
+ tmp = key;
+ key = set->array[hc];
+ set->array[hc] = tmp;
+ set_kmer_entity_del(flags, hc);
+ }
+ else
+ {
+ set->array[hc] = key;
+ break;
+ }
+ }
+ }
+
+ free(flags);
+}
+
+PUBLIC_FUNC int put_kmerset(KmerSet *set, ubyte8 seq, ubyte left, ubyte right, kmer_t **kmer_p)
+{
+ ubyte8 hc;
+ encap_kmerset(set, 1);
+ hc = seq % set->size;
+
+ do
+ {
+ if(is_kmer_entity_null(set->flags, hc))
+ {
+ clear_kmer_entity_null(set->flags, hc);
+ set_new_kmer(set->array + hc, seq, left, right);
+ set->count ++;
+ *kmer_p = set->array + hc;
+ return 0;
+ }
+ else
+ {
+ if(get_kmer_seq(set->array[hc]) == seq)
+ {
+ update_kmer(set->array + hc, left, right);
+ set->array[hc].single = 0;
+ *kmer_p = set->array + hc;
+ return 1;
+ }
+ }
+
+ hc ++;
+
+ if(hc == set->size) hc = 0;
+ }
+ while(1);
+
+ *kmer_p = NULL;
+ return 0;
+}
+
+PUBLIC_FUNC byte8 count_kmerset(KmerSet *set)
+{
+ return set->count;
+}
+
+PUBLIC_FUNC static inline void reset_iter_kmerset(KmerSet *set)
+{
+ set->iter_ptr = 0;
+}
+
+PUBLIC_FUNC static inline ubyte8 iter_kmerset(KmerSet *set, kmer_t **rs)
+{
+ while(set->iter_ptr < set->size)
+ {
+ if(!is_kmer_entity_null(set->flags, set->iter_ptr))
+ {
+ *rs = set->array + set->iter_ptr;
+ set->iter_ptr ++;
+ return 1;
+ }
+
+ set->iter_ptr ++;
+ }
+
+ return 0;
+}
+
+PUBLIC_FUNC void free_kmerset(KmerSet *set)
+{
+ free(set->array);
+ free(set->flags);
+ free(set);
+}
+
+PUBLIC_FUNC void free_Sets(KmerSet **sets, int num)
+{
+ int i;
+
+ for(i = 0; i < num; i++)
+ free_kmerset(sets[i]);
+
+ free((void *)sets);
+}
+
+int count_branch2prev(kmer_t *node)
+{
+ int num = 0, i;
+
+ for(i = 0; i < 4; i++)
+ {
+ if(get_kmer_left_cov(*node, i) > 0)
+ num++;
+ }
+
+ return num;
+}
+
+int count_branch2next(kmer_t *node)
+{
+ int num = 0, i;
+
+ for(i = 0; i < 4; i++)
+ {
+ if(get_kmer_right_cov(*node, i) > 0)
+ num++;
+ }
+
+ return num;
+}
+
+void dislink2prevUncertain(kmer_t *node, char ch, boolean smaller)
+{
+ if(smaller)
+ set_kmer_left_cov(*node, ch, 0);
+ else
+ set_kmer_right_cov(*node, int_comp(ch), 0);
+
+}
+
+void dislink2nextUncertain(kmer_t *node, char ch, boolean smaller)
+{
+ if(smaller)
+ set_kmer_right_cov(*node, ch, 0);
+ else
+ set_kmer_left_cov(*node, int_comp(ch), 0);
+}
+
+
+
+
+
+
+////////////////// functions for spaced seed Kmer hash
+
+static const spcKmer empty_spckmer = {0, NULL, 1};
+
+static inline int update_spckmer(spcKmer *mer, ubyte2 s_bases, kmer_t *node)
+{
+ // if(mer->start == NULL)
+ // fprintf(stderr, "start err at:\t%llu\n",mer->seq);
+
+ spcBase *tmpBase = mer->start;
+
+ spcBase *newSpcBase;
+ newSpcBase = (spcBase *)malloc(sizeof(spcBase));
+ newSpcBase->spaced_bases = s_bases;
+ // newSpcBase->edgeID = edgeID;
+ newSpcBase->large_kmer = node;
+ newSpcBase->next = tmpBase->next;
+ tmpBase->next = newSpcBase;
+
+ mer->spaced_base_num++;
+
+ // mvnv(0,"update %llu :\t%hu\tnum: %u\n", mer->seq, tmpBase->next->spaced_bases, mer->spaced_base_num);
+ return 0;
+}
+
+static inline void set_new_spckmer(spcKmer *mer, Kmer spc_kmer, ubyte2 s_bases, kmer_t *node)
+{
+ *mer = empty_spckmer;
+ set_kmer_seq(*mer, spc_kmer);
+
+ spcBase *newSpcBase;
+ newSpcBase = (spcBase *)malloc(sizeof(spcBase));
+ newSpcBase->spaced_bases = s_bases;
+ // newSpcBase->repeat = 0;
+ // newSpcBase->edgeID = edgeID;
+ newSpcBase->large_kmer = node;
+ newSpcBase->next = NULL;
+
+ mer->start = newSpcBase;
+
+ // mvnv(0,"new %llu :\t%hu\n", mer->seq, mer->start->spaced_bases)
+
+}
+
+PUBLIC_FUNC spcKmerSet *init_spckmerset(ubyte8 init_size, float load_factor)
+{
+ spcKmerSet *set;
+
+ if(init_size < 3) init_size = 3;
+ else init_size = find_next_prime_kh(init_size);
+
+ set = (spcKmerSet *)malloc(sizeof(spcKmerSet));
+ set->size = init_size;
+ set->count = 0;
+ set->max = set->size * load_factor;
+
+ if(load_factor <= 0) load_factor = 0.25f;
+ else if(load_factor >= 1) load_factor = 0.75f;
+
+ set->load_factor = load_factor;
+ //set->iter_ptr = 0;
+ set->array = calloc(set->size, sizeof(spcKmer));
+ set->flags = malloc((set->size + 15) / 16 * 4);
+ memset(set->flags, 0x55, (set->size + 15) / 16 * 4);
+ return set;
+}
+
+PUBLIC_FUNC int search_spckmerset(spcKmerSet *set, ubyte8 seq, spcKmer **rs)
+{
+ ubyte8 hc;
+ hc = seq % set->size;
+
+ while(1)
+ {
+ if(is_kmer_entity_null(set->flags, hc))
+ {
+ return 0;
+ }
+ else
+ {
+ if(get_kmer_seq(set->array[hc]) == seq)
+ {
+ *rs = set->array + hc;
+ return 1;
+ }
+ }
+
+ hc ++;
+
+ if(hc == set->size) hc = 0;
+ }
+
+ return 0;
+}
+
+PROTECTED_FUNC static inline void encap_spckmerset(spcKmerSet *set, ubyte8 num)
+{
+ ubyte4 *flags, *f;
+ ubyte8 i, n, size, hc;
+ spcKmer key, tmp;
+
+ if(set->count + num <= set->max) return;
+
+ n = set->size;
+
+ do
+ {
+ if(n < 0xFFFFFFFU)
+ n <<= 1;
+ else
+ n += 0xFFFFFFU;
+
+ n = find_next_prime_kh(n);
+ }
+ while(n * set->load_factor < set->count + num);
+
+ set->array = realloc(set->array, n * sizeof(spcKmer));
+
+ if(set->array == NULL)
+ {
+ fprintf(stderr, "-- Out of memory --\n");
+ abort();
+ }
+
+ flags = malloc((n + 15) / 16 * 4);
+ memset(flags, 0x55, (n + 15) / 16 * 4);
+ size = set->size;
+ set->size = n;
+ set->max = n * set->load_factor;
+ f = set->flags;
+ set->flags = flags;
+ flags = f;
+
+ for(i = 0; i < size; i++)
+ {
+ if(!exists_kmer_entity(flags, i)) continue;
+
+ key = set->array[i];
+ set_kmer_entity_del(flags, i);
+
+ while(1)
+ {
+ hc = get_kmer_seq(key) % set->size;
+
+ while(!is_kmer_entity_null(set->flags, hc))
+ {
+ hc ++;
+
+ if(hc == set->size) hc = 0;
+ }
+
+ clear_kmer_entity_null(set->flags, hc);
+
+ if(hc < size && exists_kmer_entity(flags, hc))
+ {
+ tmp = key;
+ key = set->array[hc];
+ set->array[hc] = tmp;
+ set_kmer_entity_del(flags, hc);
+ }
+ else
+ {
+ set->array[hc] = key;
+ break;
+ }
+ }
+ }
+
+ free(flags);
+}
+
+PUBLIC_FUNC int put_spckmerset(spcKmerSet *set, Kmer spc_kmer, ubyte2 spaced_bases, kmer_t *node)
+{
+ ubyte8 hc;
+ encap_spckmerset(set, 1);
+ hc = spc_kmer % set->size;
+
+ do
+ {
+ if(is_kmer_entity_null(set->flags, hc)) //new! repeat_flag==0
+ {
+ clear_kmer_entity_null(set->flags, hc);
+ set_new_spckmer(set->array + hc, spc_kmer, spaced_bases, node);
+ set->count ++;
+ return 0;
+ }
+ else
+ {
+ if(get_kmer_seq(set->array[hc]) == spc_kmer) //exists! repeat_flag==1 or 0
+ {
+ return update_spckmer(set->array + hc, spaced_bases, node);
+ }
+ }
+
+ hc ++;
+
+ if(hc == set->size) hc = 0;
+ }
+ while(1);
+
+ return 3;
+}
+
+PUBLIC_FUNC void buildSpcKmerSet(KmerSet *set, spcKmerSet *spaced_kset)
+{
+ boolean spcFlag;
+ Kmer buff_kmer, spc_kmer;
+ ubyte2 spc_bases;
+
+ ubyte8 i = 0, j = 0;
+
+ for(i = 0; i < set->size; i++)
+ {
+ if(is_kmer_entity_null(set->flags, i))
+ continue;
+ else
+ {
+ // kmer_t **kmer_p;
+ // *kmer_p = set->array+i;
+ if(set->array[i].deleted != 1) //kmer not repeat
+ {
+ //spaced seed: 18 of 25, build masker and use >>,&,| for each part, only assign once
+ // 1 1111 1010 1100 1111 1101 0110 !!!OLD!!!
+ // 1 1111 1111 1111 1010 1100 1000 !!!NEW!!!
+ // 11 11111111 11111111 11111111 11001100 11110000 11000000 !!!NEW!!!
+
+ buff_kmer = get_kmer_seq(set->array[i]);
+
+ spc_kmer = ((buff_kmer >> 14) & 0xFFFFFFF00) | ((buff_kmer >> 12) & 0xC0) | ((buff_kmer >> 10) & 0x3C) | ((buff_kmer >> 6) & 0x3);
+ //0xFFFFFFF00 = 1111 11111111 11111111 11111111 00000000
+ // 0xC0 = 0000 00000000 00000000 00000000 11000000
+ // 0x3C = 0000 00000000 00000000 00000000 00111100
+ // 0x3 = 0000 00000000 00000000 00000000 00000011
+
+ spc_bases = ((buff_kmer >> 8) & 0x3000) | ((buff_kmer >> 6) & 0xC00) | ((buff_kmer >> 2) & 0x3C0) | (buff_kmer & 0x3F);
+ // 0x3000 = 110000 00000000
+ // 0xC00 = 001100 00000000
+ // 0x3C0 = 000011 11000000
+ // 0x3F = 000000 00111111
+
+ //build the 18mer and the spaced bases(7mer), put them in the spaced_kmer hash
+ spcFlag = put_spckmerset(spaced_kset, spc_kmer, spc_bases, set->array + i);
+
+ if(spcFlag != 0)
+ fprintf(stderr, "flag error: %c\tkmer exists: %llu %hu\n", spcFlag, spc_kmer, spc_bases);
+
+ // if((++j)%100000==0)
+ // fprintf(stderr,"--- %lluth spaced Kmer built\n",j);
+ }
+
+ }
+ }
+
+ //fprintf(stderr,"--- total %llu spaced Kmer built in a KmerSet\n",j);
+}
diff --git a/fusion/orderContig.c b/fusion/orderContig.c
new file mode 100644
index 0000000..b29a597
--- /dev/null
+++ b/fusion/orderContig.c
@@ -0,0 +1,4426 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+#include "dfibHeap.h"
+#include "fibHeap.h"
+#include "darray.h"
+
+#define CNBLOCKSIZE 10000
+#define MAXC 10000
+#define MAXCinBetween 200
+
+#define MaxNodeInSub 10000
+#define GapLowerBound -2000
+#define GapUpperBound 300000
+
+//static boolean static_f=0;
+
+
+static int gapCounter;
+static int orienCounter;
+static int throughCounter;
+
+static DARRAY *solidArray;
+static DARRAY *tempArray;
+
+static int solidCounter;
+
+static CTGinHEAP ctg4heapArray[MaxNodeInSub + 1]; // index in this array are put to heaps, start from 1
+static unsigned int nodesInSub[MaxNodeInSub];
+static int nodeDistance[MaxNodeInSub];
+static int nodeCounter;
+
+static unsigned int nodesInSubInOrder[MaxNodeInSub];
+static int nodeDistanceInOrder[MaxNodeInSub];
+
+static DARRAY *scaf3, *scaf5;
+static DARRAY *gap3, *gap5;
+
+static unsigned int downstreamCTG[MAXCinBetween];
+static unsigned int upstreamCTG[MAXCinBetween];
+static int dsCtgCounter;
+static int usCtgCounter;
+
+static CONNECT *checkConnect(unsigned int from_c, unsigned int to_c);
+static int maskPuzzle(int num_connect, unsigned int contigLen);
+static void freezing();
+static boolean checkOverlapInBetween(double tolerance);
+static int setConnectDelete(unsigned int from_c, unsigned int to_c, char flag, boolean cleanBinding);
+static int setConnectWP(unsigned int from_c, unsigned int to_c, char flag);
+
+static void general_linearization(boolean strict);
+static void debugging2();
+static void smallScaf();
+static void detectBreakScaf();
+static boolean checkSimple(DARRAY *ctgArray, int count);
+static void checkCircle();
+
+//find the only connection involved in connection binding
+static CONNECT *getBindCnt(unsigned int ctg)
+{
+ CONNECT *ite_cnt;
+ CONNECT *bindCnt = NULL;
+ CONNECT *temp_cnt = NULL;
+ CONNECT *temp3_cnt = NULL;
+ int count = 0;
+ int count2 = 0;
+ int count3 = 0;
+
+ ite_cnt = contig_array[ctg].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->nextInScaf)
+ {
+ count++;
+ bindCnt = ite_cnt;
+ }
+
+ if(ite_cnt->prevInScaf)
+ {
+ temp_cnt = ite_cnt;
+ count2++;
+ }
+
+ if(ite_cnt->singleInScaf)
+ {
+ temp3_cnt = ite_cnt;
+ count3++;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ if(count == 1)
+ return bindCnt;
+
+ if(count == 0 && count2 == 1)
+ return temp_cnt;
+
+ if(count == 0 && count2 == 0 && count3 == 1)
+ return temp3_cnt;
+
+ return NULL;
+}
+
+static void createAnalogousCnt(unsigned int sourceStart,
+ CONNECT *originCnt, int gap,
+ unsigned int targetStart, unsigned int targetStop)
+{
+ CONNECT *temp_cnt;
+ unsigned int balTargetStart = getTwinCtg(targetStart);
+ unsigned int balTargetStop = getTwinCtg(targetStop);
+
+ unsigned int balSourceStart = getTwinCtg(sourceStart);
+ unsigned int balSourceStop = getTwinCtg(originCnt->contigID);
+
+ originCnt->deleted = 1;
+ temp_cnt = getCntBetween(balSourceStop, balSourceStart);
+ temp_cnt->deleted = 1;
+
+ if(gap < GapLowerBound)
+ {
+ gapCounter++;
+ return;
+ }
+
+ temp_cnt = add1Connect(targetStart, targetStop, gap, originCnt->weight, 1);
+
+ if(temp_cnt)
+ temp_cnt->inherit = 1;
+
+ temp_cnt = add1Connect(balTargetStop, balTargetStart, gap, originCnt->weight, 1);
+
+ if(temp_cnt)
+ temp_cnt->inherit = 1;
+}
+// increase #long_pe_support for a conncet by 1
+static void add1LongPEcov(unsigned int fromCtg, unsigned int toCtg, int weight)
+{
+ //check if they are on the same scaff
+ if(contig_array[fromCtg].from_vt != contig_array[toCtg].from_vt ||
+ contig_array[fromCtg].to_vt != contig_array[toCtg].to_vt)
+ {
+ printf("Warning from add1LongPEcov: contig %d and %d not on the same scaffold\n",
+ fromCtg, toCtg);
+ return;
+ }
+
+ if(contig_array[fromCtg].indexInScaf >= contig_array[toCtg].indexInScaf)
+ {
+ printf("Warning from add1LongPEcov: wrong about order between contig %d and %d\n",
+ fromCtg, toCtg);
+ return;
+ }
+
+ CONNECT *bindCnt;
+ unsigned int prevCtg = fromCtg;
+ bindCnt = getBindCnt(fromCtg);
+
+ while(bindCnt)
+ {
+ if(bindCnt->maxGap + weight <= 1000)
+ bindCnt->maxGap += weight;
+ else
+ bindCnt->maxGap = 1000;
+
+ if(fromCtg == 0 && toCtg == 0)
+ printf("link (%d %d ) covered by link (%d %d), wt %d\n",
+ prevCtg, bindCnt->contigID, fromCtg, toCtg, weight);
+
+ if(bindCnt->contigID == toCtg)
+ break;
+
+ prevCtg = bindCnt->contigID;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ unsigned int bal_fc = getTwinCtg(fromCtg);
+ unsigned int bal_tc = getTwinCtg(toCtg);
+ bindCnt = getBindCnt(bal_tc);
+ prevCtg = bal_tc;
+
+ while(bindCnt)
+ {
+ if(bindCnt->maxGap + weight <= 1000)
+ bindCnt->maxGap += weight;
+ else
+ bindCnt->maxGap = 1000;
+
+ if(fromCtg == 0 && toCtg == 0)
+ printf("link (%d %d ) covered by link (%d %d), wt %d\n",
+ prevCtg, bindCnt->contigID, fromCtg, toCtg, weight);
+
+ if(bindCnt->contigID == bal_fc)
+ return;
+
+ prevCtg = bindCnt->contigID;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ printf("Warning from add1LongPEcov: not reach the end (%d %d) (B)\n", bal_tc, bal_fc);
+}
+
+// for long pair ends, move the connections along scaffolds established by shorter pair ends till reach the ends
+static void downSlide()
+{
+ fprintf(stderr, "[%s]entering this function.\n", __FUNCTION__);
+ int len = 0, gap;
+ unsigned int i;
+ CONNECT *ite_cnt, *bindCnt, *temp_cnt;
+ unsigned int bottomCtg, topCtg, bal_i;
+ unsigned int targetCtg, bal_target;
+ boolean getThrough, orienConflict;
+ int slideLen, slideLen2;
+
+ orienCounter = throughCounter = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].mask || !contig_array[i].downwardConnect)
+ continue;
+
+ bindCnt = getBindCnt(i);
+
+ if(!bindCnt)
+ continue;
+
+ bal_i = getTwinCtg(i);
+ len = slideLen = 0;
+ bottomCtg = i;
+
+ //find the last unmasked contig in this binding
+ while(bindCnt->nextInScaf)
+ {
+ len += bindCnt->gapLen + contig_array[bindCnt->contigID].length;
+
+ if(contig_array[bindCnt->contigID].mask == 0)
+ {
+ bottomCtg = bindCnt->contigID;
+ slideLen = len;
+ }
+
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ len += bindCnt->gapLen + contig_array[bindCnt->contigID].length;
+
+ if(contig_array[bindCnt->contigID].mask == 0 || bottomCtg == 0)
+ {
+ bottomCtg = bindCnt->contigID;
+ slideLen = len;
+ }
+
+ //check each connetion from long pair ends
+ ite_cnt = contig_array[i].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->deleted || ite_cnt->mask || ite_cnt->singleInScaf
+ || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ targetCtg = ite_cnt->contigID;
+
+ if(contig_array[i].from_vt == contig_array[targetCtg].from_vt) // on the same scaff
+ {
+ if(contig_array[i].indexInScaf > contig_array[targetCtg].indexInScaf)
+ orienCounter++;
+ else
+ throughCounter++;
+
+ setConnectDelete(i, ite_cnt->contigID, 1, 0);
+ ite_cnt = ite_cnt->next;
+ continue;
+
+ }
+
+ //check if this connection conflicts with previous scaffold orientationally
+ temp_cnt = getBindCnt(targetCtg);
+ orienConflict = 0;
+
+ if(temp_cnt)
+ {
+ while(temp_cnt->nextInScaf)
+ {
+ if(temp_cnt->contigID == i)
+ {
+ orienConflict = 1;
+ printf("Warning from downSlide: still on the same scaff: %d and %d\n"
+ , i, targetCtg);
+ printf("on scaff %d and %d\n",
+ contig_array[i].from_vt, contig_array[targetCtg].from_vt);
+ printf("on bal_scaff %d and %d\n",
+ contig_array[bal_target].to_vt, contig_array[bal_i].to_vt);
+ break;
+ }
+
+ temp_cnt = temp_cnt->nextInScaf;
+ }
+
+ if(temp_cnt->contigID == i)
+ orienConflict = 1;
+ }
+
+ if(orienConflict)
+ {
+ orienCounter++;
+ setConnectDelete(i, ite_cnt->contigID, 1, 0);
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ //find the most top contig along previous scaffold starting with the target contig of this connection
+ bal_target = getTwinCtg(targetCtg);
+ slideLen2 = 0;
+
+ if(contig_array[targetCtg].mask == 0)
+ {
+ topCtg = bal_target;
+ }
+ else
+ {
+ topCtg = 0;
+ }
+
+ temp_cnt = getBindCnt(bal_target);
+ getThrough = len = 0;
+
+ if(temp_cnt)
+ {
+ //find the last contig in this binding
+ while(temp_cnt->nextInScaf)
+ {
+ //check if this route reaches bal_i
+ if(temp_cnt->contigID == bal_i)
+ {
+ printf("Warning from downSlide: (B) still on the same scaff: %d and %d (%d and %d)\n",
+ i, targetCtg, bal_target, bal_i);
+ printf("on scaff %d and %d\n",
+ contig_array[i].from_vt, contig_array[targetCtg].from_vt);
+ printf("on bal_scaff %d and %d\n",
+ contig_array[bal_target].to_vt, contig_array[bal_i].to_vt);
+ getThrough = 1;
+ break;
+ }
+
+ len += temp_cnt->gapLen + contig_array[temp_cnt->contigID].length;
+
+ if(contig_array[temp_cnt->contigID].mask == 0)
+ {
+ topCtg = temp_cnt->contigID;
+ slideLen2 = len;
+ }
+
+ temp_cnt = temp_cnt->nextInScaf;
+ }
+
+ len += temp_cnt->gapLen + contig_array[temp_cnt->contigID].length;
+
+ if(contig_array[temp_cnt->contigID].mask == 0 || topCtg == 0)
+ {
+ topCtg = temp_cnt->contigID;
+ slideLen2 = len;
+ }
+
+ if(temp_cnt->contigID == bal_i)
+ getThrough = 1;
+ else
+ topCtg = getTwinCtg(topCtg);
+ }
+ else
+ topCtg = targetCtg;
+
+ if(getThrough)
+ {
+ throughCounter++;
+ setConnectDelete(i, ite_cnt->contigID, 1, 0);
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ //add a connection between bottomCtg and topCtg
+ gap = ite_cnt->gapLen - slideLen - slideLen2;
+
+ if(bottomCtg != topCtg && !(i == bottomCtg && targetCtg == topCtg))
+ {
+ createAnalogousCnt(i, ite_cnt, gap, bottomCtg, topCtg);
+
+ if(contig_array[bottomCtg].mask || contig_array[topCtg].mask)
+ printf("downSlide to masked contig\n");
+ }
+
+ ite_cnt = ite_cnt->next;
+ } //for each connect
+ } // for each contig
+
+ //printf("downSliding is done...orienConflict %d, fall inside %d\n",
+ // orienCounter,throughCounter);
+}
+
+static boolean setNextInScaf(CONNECT *cnt, CONNECT *nextCnt)
+{
+ if(!cnt)
+ {
+ printf("setNextInScaf: empty pointer\n");
+ return 0;
+ }
+
+ if(!nextCnt)
+ {
+ cnt->nextInScaf = nextCnt;
+ return 1;
+ }
+
+ if(cnt->mask || cnt->deleted)
+ {
+ printf("setNextInScaf: cnt is masked or deleted\n");
+ return 0;
+ }
+
+ if(nextCnt->deleted || nextCnt->mask)
+ {
+ printf("setNextInScaf: nextCnt is masked or deleted\n");
+ return 0;
+ }
+
+ cnt->nextInScaf = nextCnt;
+ return 1;
+}
+
+static boolean setPrevInScaf(CONNECT *cnt, boolean flag)
+{
+ if(!cnt)
+ {
+ printf("setPrevInScaf: empty pointer\n");
+ return 0;
+ }
+
+ if(!flag)
+ {
+ cnt->prevInScaf = flag;
+ return 1;
+ }
+
+ if(cnt->mask || cnt->deleted)
+ {
+ printf("setPrevInScaf: cnt is masked or deleted\n");
+ return 0;
+ }
+
+ cnt->prevInScaf = flag;
+ return 1;
+}
+
+/*
+connect A is upstream to B, replace A with C
+from_c
+ > branch_c - to_c
+from_c_new
+*/
+static void substitueUSinScaf(CONNECT *origin, unsigned int from_c_new)
+{
+ if(!origin || !origin->nextInScaf)
+ return;
+
+ unsigned int branch_c, to_c;
+ unsigned int bal_branch_c, bal_to_c;
+ unsigned int bal_from_c_new = getTwinCtg(from_c_new);
+ CONNECT *bal_origin, *bal_nextCNT, *prevCNT, *bal_prevCNT;
+
+
+ branch_c = origin->contigID;
+ to_c = origin->nextInScaf->contigID;
+ bal_branch_c = getTwinCtg(branch_c);
+ bal_to_c = getTwinCtg(to_c);
+
+ prevCNT = checkConnect(from_c_new, branch_c);
+ bal_nextCNT = checkConnect(bal_to_c, bal_branch_c);
+
+ if(!bal_nextCNT)
+ {
+ printf("substitueUSinScaf: no connect between %d and %d\n", bal_to_c, bal_branch_c);
+ return;
+ }
+
+ bal_origin = bal_nextCNT->nextInScaf;
+ bal_prevCNT = checkConnect(bal_branch_c, bal_from_c_new);
+
+ setPrevInScaf(bal_nextCNT->nextInScaf, 0);
+ setNextInScaf(prevCNT, origin->nextInScaf);
+ setNextInScaf(bal_nextCNT, bal_prevCNT);
+ setPrevInScaf(bal_prevCNT, 1);
+
+ setNextInScaf(origin, NULL);
+ setPrevInScaf(bal_origin, 0);
+}
+
+/*
+connect B is downstream to C, replace B with A
+ to_c
+from_c - branch_c <
+ to_c_new
+*/
+static void substitueDSinScaf(CONNECT *origin, unsigned int branch_c, unsigned int to_c_new)
+{
+ if(!origin || !origin->prevInScaf)
+ return;
+
+ unsigned int to_c;
+ unsigned int bal_branch_c, bal_to_c, bal_to_c_new;
+ unsigned int from_c, bal_from_c;
+ CONNECT *bal_origin, *prevCNT, *bal_prevCNT;
+ CONNECT *nextCNT, *bal_nextCNT;
+
+
+ to_c = origin->contigID;
+ bal_branch_c = getTwinCtg(branch_c);
+ bal_to_c = getTwinCtg(to_c);
+ bal_origin = getCntBetween(bal_to_c, bal_branch_c);
+
+ if(!bal_origin)
+ {
+ printf("substitueDSinScaf: no connect between %d and %d\n", bal_to_c, bal_branch_c);
+ return;
+ }
+
+ bal_from_c = bal_origin->nextInScaf->contigID;
+ from_c = getTwinCtg(bal_from_c);
+ bal_to_c_new = getTwinCtg(to_c_new);
+
+ prevCNT = checkConnect(from_c, branch_c);
+ nextCNT = checkConnect(branch_c, to_c_new);
+ setNextInScaf(prevCNT, nextCNT);
+ setPrevInScaf(nextCNT, 1);
+
+ bal_nextCNT = checkConnect(bal_to_c_new, bal_branch_c);
+ bal_prevCNT = checkConnect(bal_branch_c, bal_from_c);
+
+ setNextInScaf(bal_nextCNT, bal_prevCNT);
+ setPrevInScaf(origin, 0);
+ setNextInScaf(bal_origin, NULL);
+}
+
+static int validConnect(unsigned int ctg, CONNECT *preCNT)
+{
+ if(preCNT && preCNT->nextInScaf)
+ return 1;
+
+ CONNECT *cn_temp;
+ int count = 0;
+
+ if(!contig_array[ctg].downwardConnect)
+ return count;
+
+ cn_temp = contig_array[ctg].downwardConnect;
+
+ while(cn_temp)
+ {
+ if(!cn_temp->deleted && !cn_temp->mask)
+ count++;
+
+ cn_temp = cn_temp->next;
+ }
+
+ return count;
+}
+
+static CONNECT *getNextContig(unsigned int ctg, CONNECT *preCNT, boolean *exception)
+{
+ CONNECT *cn_temp, *retCNT = NULL;
+ int count = 0, valid_in;
+ unsigned int nextCtg, bal_ctg;
+
+ *exception = 0;
+
+ if(preCNT && preCNT->nextInScaf)
+ {
+ if(preCNT->contigID != ctg)
+ printf("pre cnt does not lead to %d\n", ctg);
+
+ nextCtg = preCNT->nextInScaf->contigID;
+ cn_temp = getCntBetween(ctg, nextCtg);
+
+ if(cn_temp && (cn_temp->mask || cn_temp->deleted))
+ {
+ printf("getNextContig: arc(%d %d) twin (%d %d) with mask %d deleted %d\n"
+ , ctg, nextCtg, getTwinCtg(nextCtg), getTwinCtg(ctg)
+ , cn_temp->mask, cn_temp->deleted);
+
+ if(!cn_temp->prevInScaf)
+ printf("not even has a prevInScaf\n");
+
+ cn_temp = getCntBetween(getTwinCtg(nextCtg),
+ getTwinCtg(ctg));
+
+ if(!cn_temp->nextInScaf)
+ printf("its twin cnt not has a nextInScaf\n");
+
+ fflush(stdout);
+ *exception = 1;
+ }
+ else
+ return preCNT->nextInScaf;
+ }
+
+ bal_ctg = getTwinCtg(ctg);
+ valid_in = validConnect(bal_ctg, NULL);
+
+ if(valid_in > 1)
+ return NULL;
+
+ if(!contig_array[ctg].downwardConnect)
+ return NULL;
+
+ cn_temp = contig_array[ctg].downwardConnect;
+
+ while(cn_temp)
+ {
+ if(cn_temp->mask || cn_temp->deleted)
+ {
+ cn_temp = cn_temp->next;
+ continue;
+ }
+
+ count++;
+
+ if(count == 1)
+ retCNT = cn_temp;
+ else if(count == 2)
+ return NULL;
+
+ cn_temp = cn_temp->next;
+ }
+
+ return retCNT;
+}
+
+// get the valid connect between 2 given ctgs
+static CONNECT *checkConnect(unsigned int from_c, unsigned int to_c)
+{
+ CONNECT *cn_temp = getCntBetween(from_c, to_c);
+
+ if(!cn_temp)
+ return NULL;
+
+ if(!cn_temp->mask && !cn_temp->deleted)
+ return cn_temp;
+
+ return NULL;
+}
+
+static int setConnectMask(unsigned int from_c, unsigned int to_c, char mask)
+{
+ CONNECT *cn_temp, *cn_bal, *cn_ds, *cn_us;
+ unsigned int bal_fc = getTwinCtg(from_c);
+ unsigned int bal_tc = getTwinCtg(to_c);
+ unsigned int ctg3, bal_ctg3;
+
+ cn_temp = getCntBetween(from_c, to_c);
+ cn_bal = getCntBetween(bal_tc, bal_fc);
+
+ if(!cn_temp || !cn_bal)
+ {
+ return 0;
+ }
+
+ cn_temp->mask = mask;
+ cn_bal->mask = mask;
+
+ if(!mask)
+ return 1;
+
+ if(cn_temp->nextInScaf) //undo the binding
+ {
+ setPrevInScaf(cn_temp->nextInScaf, 0);
+ ctg3 = cn_temp->nextInScaf->contigID;
+ setNextInScaf(cn_temp, NULL);
+ bal_ctg3 = getTwinCtg(ctg3);
+ cn_ds = getCntBetween(bal_ctg3, bal_tc);
+ setNextInScaf(cn_ds, NULL);
+ setPrevInScaf(cn_bal, 0);
+ }
+
+ // ctg3 -> from_c -> to_c
+ // bal_ctg3 <- bal_fc <- bal_tc
+ if(cn_bal->nextInScaf)
+ {
+ setPrevInScaf(cn_bal->nextInScaf, 0);
+ bal_ctg3 = cn_bal->nextInScaf->contigID;
+ setNextInScaf(cn_bal, NULL);
+ ctg3 = getTwinCtg(bal_ctg3);
+ cn_us = getCntBetween(ctg3, from_c);
+ setNextInScaf(cn_us, NULL);
+ setPrevInScaf(cn_temp, 0);
+ }
+
+ return 1;
+}
+
+
+static boolean setConnectUsed(unsigned int from_c, unsigned int to_c, char flag)
+{
+ CONNECT *cn_temp, *cn_bal;
+ unsigned int bal_fc = getTwinCtg(from_c);
+ unsigned int bal_tc = getTwinCtg(to_c);
+
+ cn_temp = getCntBetween(from_c, to_c);
+ cn_bal = getCntBetween(bal_tc, bal_fc);
+
+ if(!cn_temp || !cn_bal)
+ {
+ return 0;
+ }
+
+ cn_temp->used = flag;
+ cn_bal->used = flag;
+
+ return 1;
+}
+
+static int setConnectWP(unsigned int from_c, unsigned int to_c, char flag)
+{
+ CONNECT *cn_temp, *cn_bal;
+ unsigned int bal_fc = getTwinCtg(from_c);
+ unsigned int bal_tc = getTwinCtg(to_c);
+
+ cn_temp = getCntBetween(from_c, to_c);
+ cn_bal = getCntBetween(bal_tc, bal_fc);
+
+ if(!cn_temp || !cn_bal)
+ {
+ return 0;
+ }
+
+ cn_temp->weakPoint = flag;
+ cn_bal->weakPoint = flag;
+ //fprintf(stderr,"contig %d and %d, weakPoint %d\n",from_c,to_c,cn_temp->weakPoint);
+ //fprintf(stderr,"contig %d and %d, weakPoint %d\n",bal_tc,bal_fc,cn_bal->weakPoint);
+ return 1;
+}
+
+static int setConnectDelete(unsigned int from_c, unsigned int to_c, char flag, boolean cleanBinding)
+{
+ CONNECT *cn_temp, *cn_bal;
+ unsigned int bal_fc = getTwinCtg(from_c);
+ unsigned int bal_tc = getTwinCtg(to_c);
+
+ cn_temp = getCntBetween(from_c, to_c);
+ cn_bal = getCntBetween(bal_tc, bal_fc);
+
+ if(!cn_temp || !cn_bal)
+ {
+ return 0;
+ }
+
+ cn_temp->deleted = flag;
+ cn_bal->deleted = flag;
+
+ if(!flag)
+ return 1;
+
+ if(cleanBinding)
+ {
+ cn_temp->prevInScaf = 0;
+ cn_temp->nextInScaf = NULL;
+ cn_bal->prevInScaf = 0;
+ cn_bal->nextInScaf = NULL;
+ }
+
+ return 1;
+}
+
+static void maskContig(unsigned int ctg, boolean flag)
+{
+ unsigned int bal_ctg, ctg2, bal_ctg2;
+ CONNECT *cn_temp;
+
+ bal_ctg = getTwinCtg(ctg);
+ cn_temp = contig_array[ctg].downwardConnect;
+
+ while(cn_temp)
+ {
+ if(cn_temp->mask || cn_temp->prevInScaf || cn_temp->nextInScaf || cn_temp->singleInScaf)
+ {
+ cn_temp = cn_temp->next;
+ continue;
+ }
+
+ ctg2 = cn_temp->contigID;
+ setConnectMask(ctg, ctg2, flag);
+ cn_temp = cn_temp->next;
+ }
+
+ // bal_ctg2 <- bal_ctg
+ cn_temp = contig_array[bal_ctg].downwardConnect;
+
+ while(cn_temp)
+ {
+ if(cn_temp->mask || cn_temp->prevInScaf || cn_temp->nextInScaf || cn_temp->singleInScaf)
+ {
+ cn_temp = cn_temp->next;
+ continue;
+ }
+
+ bal_ctg2 = cn_temp->contigID;
+ setConnectMask(bal_ctg, bal_ctg2, flag);
+ cn_temp = cn_temp->next;
+ }
+
+ contig_array[ctg].mask = flag;
+ contig_array[bal_ctg].mask = flag;
+}
+
+static int maskPuzzle(int num_connect, unsigned int contigLen)
+{
+ int in_num, out_num, flag = 0, puzzleCounter = 0;
+ unsigned int i, bal_i;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contigLen && contig_array[i].length > contigLen)
+ break;
+
+ if(contig_array[i].mask)
+ continue;
+
+ bal_i = getTwinCtg(i);
+ in_num = validConnect(bal_i, NULL);
+ out_num = validConnect(i, NULL);
+
+ if((in_num > 1 || out_num > 1) && (in_num + out_num >= num_connect))
+ {
+ flag++;
+ maskContig(i, 1);
+ }
+
+ in_num = validConnect(bal_i, NULL);
+ out_num = validConnect(i, NULL);
+
+ if(in_num > 1 || out_num > 1)
+ {
+ puzzleCounter++;
+ //debugging2(i);
+ }
+
+ if(isSmallerThanTwin(i))
+ i++;
+ }
+
+ //printf("Masked %d contigs, %d puzzle left\n",flag,puzzleCounter);
+ return flag;
+}
+
+static void deleteWeakCnt(int cut_off)
+{
+ unsigned int i;
+ CONNECT *cn_temp1;
+ int weaks = 0, counter = 0;
+
+ //fprintf(stderr,"[%s]entering this function. num_ctg=%d\n",__FUNCTION__,num_ctg);
+ for(i = 1; i <= num_ctg; i++)
+ {
+ //fprintf(stderr,"[%s]iterating %d.\n",__FUNCTION__,i);
+ cn_temp1 = contig_array[i].downwardConnect;
+
+ while(cn_temp1)
+ {
+ if(!cn_temp1->mask && !cn_temp1->deleted && !cn_temp1->nextInScaf
+ && !cn_temp1->singleInScaf && !cn_temp1->prevInScaf)
+ {
+ counter++;
+ }
+
+ if(cn_temp1->weak && cn_temp1->deleted && cn_temp1->weight >= cut_off)
+ {
+ cn_temp1->deleted = 0;
+ cn_temp1->weak = 0;
+ }
+ else if(!cn_temp1->deleted && cn_temp1->weight > 0 && cn_temp1->weight < cut_off
+ && !cn_temp1->nextInScaf && !cn_temp1->prevInScaf)
+ {
+ cn_temp1->deleted = 1;
+ cn_temp1->weak = 1;
+
+ if(cn_temp1->singleInScaf)
+ cn_temp1->singleInScaf = 0;
+
+ if(!cn_temp1->mask)
+ weaks++;
+ }
+
+ cn_temp1 = cn_temp1->next;
+ }
+
+ }
+
+ fprintf(stderr, "[%s]%d connects doesn't meet weight threshold .\n", __FUNCTION__, weaks);
+ checkCircle();
+}
+
+//check if one contig is linearly connected to the other ->C1->C2...
+static int linearC2C(unsigned int starter, CONNECT *cnt2c1, unsigned int c2, int min_dis, int max_dis)
+{
+ int out_num, in_num;
+ CONNECT *prevCNT, *cnt, *cn_temp;
+ unsigned int c1, bal_c1, ctg, bal_c2;
+ int len = 0;
+ unsigned int bal_start = getTwinCtg(starter);
+ boolean excep;
+
+ c1 = cnt2c1->contigID;
+
+ if(c1 == c2)
+ {
+ printf("linearC2C: c1(%d) and c2(%d) are the same contig\n", c1, c2);
+ return -1;
+ }
+
+ bal_c1 = getTwinCtg(c1);
+ in_num = validConnect(bal_c1, NULL);
+
+ if(in_num > 1)
+ return 0;
+
+ dsCtgCounter = 1;
+ usCtgCounter = 0;
+ downstreamCTG[dsCtgCounter++] = c1;
+ bal_c2 = getTwinCtg(c2);
+ upstreamCTG[usCtgCounter++] = bal_c2;
+ // check if c1 is linearly connected to c2 by pe connections
+ cnt = prevCNT = cnt2c1;
+
+ while((cnt = getNextContig(c1, prevCNT, &excep)) != NULL)
+ {
+ c1 = cnt->contigID;
+ len += cnt->gapLen + contig_array[c1].length;
+
+ if(c1 == c2)
+ return 1;
+
+ if(len > max_dis || c1 == starter || c1 == bal_start)
+ return 0;
+
+ downstreamCTG[dsCtgCounter++] = c1;
+
+ if(dsCtgCounter >= MAXCinBetween)
+ {
+ printf("%d downstream contigs, start at %d, max_dis %d, current dis %d\n"
+ , dsCtgCounter, starter, max_dis, len);
+ return 0;
+ }
+
+ prevCNT = cnt;
+ }
+
+ out_num = validConnect(c1, NULL);
+
+ if(out_num)
+ return 0;
+
+
+ //find the most upstream contig to c2
+ cnt = prevCNT = NULL;
+ ctg = bal_c2;
+
+ while((cnt = getNextContig(ctg, prevCNT, &excep)) != NULL)
+ {
+ ctg = cnt->contigID;
+ len += cnt->gapLen + contig_array[ctg].length;
+
+ if(len > max_dis || ctg == starter || ctg == bal_start)
+ return 0;
+
+ prevCNT = cnt;
+ upstreamCTG[usCtgCounter++] = ctg;
+
+ if(usCtgCounter >= MAXCinBetween)
+ {
+ printf("%d upstream contigs, start at %d, max_dis %d, current dis %d\n"
+ , usCtgCounter, starter, max_dis, len);
+ return 0;
+ }
+ }
+
+ if(dsCtgCounter + usCtgCounter > MAXCinBetween)
+ {
+ printf("%d downstream and %d upstream contigs\n", dsCtgCounter, usCtgCounter);
+ return 0;
+ }
+
+ out_num = validConnect(ctg, NULL);
+
+ if(out_num)
+ {
+ return 0;
+ }
+
+ c2 = getTwinCtg(ctg);
+ min_dis -= len;
+ max_dis -= len;
+
+ if(c1 == c2 || c1 == ctg || max_dis < 0)
+ return 0;
+
+ cn_temp = getCntBetween(c1, c2);
+
+ if(cn_temp)
+ {
+ setConnectMask(c1, c2, 0);
+ setConnectDelete(c1, c2, 0, 0);
+ return 1;
+ }
+
+ len = (min_dis + max_dis) / 2 >= 0 ? (min_dis + max_dis) / 2 : 0;
+ cn_temp = allocateCN(c2, len);
+
+ if(cntLookupTable)
+ putCnt2LookupTable(c1, cn_temp);
+
+ cn_temp->weight = 0; // special connect from the original graph
+ cn_temp->next = contig_array[c1].downwardConnect;
+ contig_array[c1].downwardConnect = cn_temp;
+
+ bal_c1 = getTwinCtg(c1);
+ bal_c2 = getTwinCtg(c2);
+
+ cn_temp = allocateCN(bal_c1, len);
+
+ if(cntLookupTable)
+ putCnt2LookupTable(bal_c2, cn_temp);
+
+ cn_temp->weight = 0; // special connect from the original graph
+ cn_temp->next = contig_array[bal_c2].downwardConnect;
+ contig_array[bal_c2].downwardConnect = cn_temp;
+ return 1;
+}
+//catenate upstream contig array and downstream contig array to solidArray
+static void catUsDsContig()
+{
+ int i;
+
+ for(i = 0; i < dsCtgCounter; i++)
+ *(unsigned int *)darrayPut(solidArray, i) = downstreamCTG[i];
+
+ for(i = usCtgCounter - 1; i >= 0; i--)
+ {
+ *(unsigned int *)darrayPut(solidArray, dsCtgCounter++) = getTwinCtg(upstreamCTG[i]);
+ }
+
+ solidCounter = dsCtgCounter;
+}
+
+//binding the connections between contigs in solidArray
+static void consolidate()
+{
+ int i, j;
+ CONNECT *prevCNT = NULL;
+ CONNECT *cnt;
+ unsigned int to_ctg;
+ unsigned int from_ctg = *(unsigned int *)darrayGet(solidArray, 0);
+
+ for(i = 1; i < solidCounter; i++)
+ {
+ to_ctg = *(unsigned int *)darrayGet(solidArray, i);
+ cnt = checkConnect(from_ctg, to_ctg);
+
+ if(!cnt)
+ {
+ printf("consolidate A: no connect from %d to %d\n",
+ from_ctg, to_ctg);
+
+ for(j = 0; j < solidCounter; j++)
+ printf("%d-->", *(unsigned int *)darrayGet(solidArray, j));
+
+ printf("\n");
+ return;
+ }
+
+ cnt->singleInScaf = solidCounter == 2 ? 1 : 0;
+
+ if(prevCNT)
+ {
+ setNextInScaf(prevCNT, cnt);
+ setPrevInScaf(cnt, 1);
+ }
+
+ prevCNT = cnt;
+ from_ctg = to_ctg;
+ }
+
+ //the reverse complementary path
+ from_ctg = getTwinCtg(*(unsigned int *)darrayGet(solidArray, solidCounter - 1));
+ prevCNT = NULL;
+
+ for(i = solidCounter - 2; i >= 0; i--)
+ {
+ to_ctg = getTwinCtg(*(unsigned int *)darrayGet(solidArray, i));
+ cnt = checkConnect(from_ctg, to_ctg);
+
+ if(!cnt)
+ {
+ printf("consolidate B: no connect from %d to %d\n", from_ctg, to_ctg);
+ return;
+ }
+
+ cnt->singleInScaf = solidCounter == 2 ? 1 : 0;
+
+ if(prevCNT)
+ {
+ setNextInScaf(prevCNT, cnt);
+ setPrevInScaf(cnt, 1);
+ }
+
+ prevCNT = cnt;
+ from_ctg = to_ctg;
+ }
+
+}
+
+static void debugging1(unsigned int ctg1, unsigned int ctg2)
+{
+ CONNECT *cn1;
+ cn1 = getCntBetween(ctg1, ctg2);
+
+ if(cn1)
+ {
+ printf("(%d,%d) mask %d deleted %d w %d,singleInScaf %d\n",
+ ctg1, ctg2, cn1->mask, cn1->deleted, cn1->weight, cn1->singleInScaf);
+
+ if(cn1->nextInScaf)
+ printf("%d->%d->%d\n", ctg1, ctg2, cn1->nextInScaf->contigID);
+
+ if(cn1->prevInScaf)
+ printf("*->%d->%d\n", ctg1, ctg2);
+ else if(!cn1->nextInScaf)
+ printf("NULL->%d->%d->NULL\n", ctg1, ctg2);
+ }
+ else
+ printf("%d -X- %d\n", ctg1, ctg2);
+}
+//remove transitive connections which cross linear paths (these paths may be broken)
+//if a->b->c and a->c, mask a->c
+static void removeTransitive()
+{
+ unsigned int i, bal_ctg;
+ int flag = 1, out_num, in_num, count, min, max, linear;
+ CONNECT *cn_temp, *cn1 = NULL, *cn2 = NULL;
+
+ while(flag)
+ {
+ flag = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].mask)
+ continue;
+
+ out_num = validConnect(i, NULL);
+
+ if(out_num != 2)
+ continue;
+
+ cn_temp = contig_array[i].downwardConnect;
+ count = 0;
+
+ while(cn_temp)
+ {
+ if(cn_temp->deleted || cn_temp->mask)
+ {
+ cn_temp = cn_temp->next;
+ continue;
+ }
+
+ count++;
+
+ if(count == 1)
+ cn1 = cn_temp;
+ else if(count == 2)
+ {
+ cn2 = cn_temp;
+ }
+ else // count > 2
+ break;
+
+ cn_temp = cn_temp->next;
+ }
+
+ if(count > 2)
+ {
+ printf("%d valid connections from ctg %d\n", count, i);
+ continue;
+ }
+
+ if(cn1->gapLen > cn2->gapLen)
+ {
+ cn_temp = cn1;
+ cn1 = cn2;
+ cn2 = cn_temp;
+ } //make sure cn1 is closer to contig i than cn2
+
+ if(cn1->prevInScaf && cn2->prevInScaf)
+ continue;
+
+ bal_ctg = getTwinCtg(cn2->contigID);
+ in_num = validConnect(bal_ctg, NULL);
+
+ if(in_num > 2)
+ continue;
+
+ min = cn2->gapLen - cn1->gapLen - contig_array[cn1->contigID].length - ins_size_var / 2;
+ max = cn2->gapLen - cn1->gapLen - contig_array[cn1->contigID].length + ins_size_var / 2;
+
+ if(max < 0)
+ continue;
+
+ //temprarily delete cn2
+ setConnectDelete(i, cn2->contigID, 1, 0);
+ linear = linearC2C(i, cn1, cn2->contigID, min, max);
+
+ if(linear != 1)
+ {
+ setConnectDelete(i, cn2->contigID, 0, 0);
+ continue;
+ }
+ else
+ {
+ downstreamCTG[0] = i;
+ catUsDsContig();
+
+ if(!checkSimple(solidArray, solidCounter))
+ continue;
+
+ cn1 = getCntBetween(*(unsigned int *)darrayGet(solidArray, solidCounter - 2),
+ *(unsigned int *)darrayGet(solidArray, solidCounter - 1));
+
+ if(cn1 && cn1->nextInScaf && cn2->nextInScaf)
+ {
+ setConnectDelete(i, cn2->contigID, 0, 0);
+ continue;
+ }
+
+ consolidate();
+
+ if(cn2->prevInScaf)
+ substitueDSinScaf(cn2, *(unsigned int *)darrayGet(solidArray, 0),
+ *(unsigned int *)darrayGet(solidArray, 1));
+
+ if(cn2->nextInScaf)
+ substitueUSinScaf(cn2, *(unsigned int *)darrayGet(solidArray, solidCounter - 2));
+
+ flag++;
+ }
+ } //for each contig
+
+ //printf("a remove transitive lag, %d connections removed\n",flag);
+ }
+
+}
+
+//get repeat contigs back into the scaffold according to connected unique contigs on both sides
+/*
+ A ------ D
+ > [i] <
+ B E
+*/
+static void debugging2(unsigned int ctg)
+{
+ CONNECT *cn1 = contig_array[ctg].downwardConnect;
+
+ while(cn1)
+ {
+ if(cn1->nextInScaf)
+ fprintf(stderr, "with nextInScaf,");
+
+ if(cn1->prevInScaf)
+ fprintf(stderr, "with prevInScaf,");
+
+ fprintf(stderr, "%u >> %d, mask %d deleted %d, inherit %d, singleInScaf %d\n",
+ ctg, cn1->contigID, cn1->mask, cn1->deleted, cn1->inherit, cn1->singleInScaf);
+ cn1 = cn1->next;
+ }
+}
+static void debugging()
+{
+ /*
+ debugging1(1777,1468);
+ debugging2(8065);
+ debugging2(8066);
+ */
+}
+
+static void simplifyCnt()
+{
+ removeTransitive();
+ debugging();
+ general_linearization(1);
+ debugging();
+}
+
+static int getIndexInArray(unsigned int node)
+{
+ int index;
+
+ for(index = 0; index < nodeCounter; index++)
+ if(nodesInSub[index] == node)
+ return index;
+
+ return -1;
+}
+
+static boolean putNodeIntoSubgraph(FibHeap *heap, int distance, unsigned int node, int index)
+{
+
+ int pos = getIndexInArray(node);
+
+ if(pos > 0)
+ {
+ //printf("exists\n");
+ return 0;
+ }
+
+ if(index >= MaxNodeInSub)
+ return -1;
+
+ insertNodeIntoHeap(heap, distance, node);
+ nodesInSub[index] = node;
+ nodeDistance[index] = distance;
+ return 1;
+}
+
+static boolean putChainIntoSubgraph(FibHeap *heap, int distance, unsigned int node, int *index, CONNECT *prevC)
+{
+ unsigned int ctg = node;
+ CONNECT *nextCnt;
+ boolean excep, flag;
+ int counter = *index;
+
+ while(1)
+ {
+ nextCnt = getNextContig(ctg, prevC, &excep);
+
+ if(excep || !nextCnt)
+ {
+ *index = counter;
+ return 1;
+ }
+
+ ctg = nextCnt->contigID;
+ distance += nextCnt->gapLen + ctg;
+ flag = putNodeIntoSubgraph(heap, distance, ctg, counter);
+
+ if(flag < 0)
+ return 0;
+
+ if(flag > 0)
+ counter++;
+
+ prevC = nextCnt;
+ }
+}
+//check if nodes in subgraph have a potential heter form
+static boolean check_het_overlap(double tolerance)
+{
+
+ int i, gap, overlap_point;
+ unsigned int node;
+ int len_sum, over3_len, over3_sum;
+ boolean flag = 0;
+ len_sum = 0;
+ over3_len = 0;
+ over3_sum = 0;
+
+ for(i = 1; i <= nodeCounter; i++)
+ {
+ node = ctg4heapArray[i].ctgID;
+ len_sum += contig_array[node].length;
+ }
+
+ if(len_sum < 1)
+ return 2;
+
+ for(i = 1; i < nodeCounter; i++)
+ {
+ gap = ctg4heapArray[i + 1].dis - ctg4heapArray[i].dis
+ - contig_array[ctg4heapArray[i + 1].ctgID].length;
+
+ if(gap > 0)
+ {
+ flag = 0;
+ }
+ else
+ {
+ if(flag)
+ {
+ over3_len = ctg4heapArray[i + 1].dis - overlap_point
+ - contig_array[ctg4heapArray[i + 1].ctgID].length;
+ over3_sum += over3_len;
+
+ if((double)over3_sum / len_sum > tolerance)
+ return 0;
+ }
+
+ flag = 1;
+ overlap_point = ctg4heapArray[i].dis;
+ }
+ }
+
+ return 2;
+}
+
+// check if a contig is unique by trying to line its downstream/upstream nodes together
+static boolean checkUnique(unsigned int node, double tolerance)
+{
+ CONNECT *ite_cnt;
+ unsigned int currNode;
+ int distance;
+ int popCounter = 0;
+ boolean flag;
+
+ currNode = node;
+ FibHeap *heap = newFibHeap();
+
+ putNodeIntoSubgraph(heap, 0, currNode, 0);
+ nodeCounter = 1;
+ ite_cnt = contig_array[currNode].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->deleted || ite_cnt->mask)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ currNode = ite_cnt->contigID;
+ distance = ite_cnt->gapLen + contig_array[currNode].length;
+ flag = putNodeIntoSubgraph(heap, distance, currNode, nodeCounter);
+
+ if(flag < 0)
+ {
+ destroyHeap(heap);
+ return 0;
+ }
+
+ if(flag > 0)
+ nodeCounter++;
+
+ flag = putChainIntoSubgraph(heap, distance, currNode, &nodeCounter, ite_cnt);
+
+ if(!flag)
+ {
+ destroyHeap(heap);
+ return 0;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ if(nodeCounter <= 2) // no more than 2 valid connections
+ {
+ destroyHeap(heap);
+ return 1;
+ }
+
+ while((currNode = removeNextNodeFromHeap(heap)) != 0)
+ nodesInSubInOrder[popCounter++] = currNode;
+
+ destroyHeap(heap);
+
+ flag = checkOverlapInBetween(tolerance);
+
+ if(flag == 1)
+ {
+ return 1;
+ }
+ else
+ {
+ flag = check_het_overlap(0.02);//check the heter form
+ }
+
+ return flag;
+}
+
+//find longest path and break the other
+static void process_ds_contig(unsigned int ctg)
+{
+ unsigned int target = ctg4heapArray[nodeCounter].ctgID;
+ //int boarder = ctg4heapArray[nodeCounter].dis;
+ boolean excep;
+ CONNECT *route = contig_array[ctg].downwardConnect;
+ CONNECT *max_route = route;
+
+ int max_dis = 0;
+
+ boolean end_flag = 0;
+
+ while(route)
+ {
+
+ int dis = 0;
+ CONNECT *tmp_cnt = route;
+
+ while(tmp_cnt)
+ {
+ dis += route->gapLen + contig_array[route->contigID].length;
+
+ if(route->contigID == target)
+ {
+ end_flag = 1;
+ break;
+ }
+
+ tmp_cnt = getNextContig(route->contigID, tmp_cnt, &excep);
+ }
+
+ if(dis > max_dis)
+ {
+ max_dis = dis;
+ max_route = route;
+ }
+
+ if(end_flag)
+ {
+ max_route = route;
+ break;
+ }
+
+ route = route->next;
+ }
+
+ //delete connect except max_route
+ route = contig_array[ctg].downwardConnect;
+
+ while(route)
+ {
+ if(route != max_route)
+ {
+ setConnectMask(ctg, route->contigID, 1);
+ }
+
+ route = route->next;
+ }
+
+}
+static void process_us_contig(unsigned int ctg)
+{
+ unsigned int target = ctg4heapArray[1].ctgID;
+ //int boarder = ctg4heapArray[1].dis;
+ boolean excep;
+ CONNECT *route = contig_array[ctg].downwardConnect;
+ CONNECT *min_route = route;
+
+ int min_dis = 0;
+
+ boolean end_flag = 0;
+
+ while(route)
+ {
+
+ int dis = 0;
+ CONNECT *tmp_cnt = route;
+
+ while(tmp_cnt)
+ {
+ dis -= route->gapLen + contig_array[route->contigID].length;
+
+ if(route->contigID == target)
+ {
+ end_flag = 1;
+ break;
+ }
+
+ tmp_cnt = getNextContig(route->contigID, tmp_cnt, &excep);
+ }
+
+ if(dis < min_dis)
+ {
+ min_dis = dis;
+ min_route = route;
+ }
+
+ if(end_flag)
+ {
+ min_route = route;
+ break;
+ }
+
+ route = route->next;
+ }
+
+ //delete connect except min_route
+ route = contig_array[ctg].downwardConnect;
+
+ while(route)
+ {
+ if(route != min_route)
+ {
+ setConnectMask(ctg, route->contigID, 1);
+ }
+
+ route = route->next;
+ }
+
+}
+
+//mask contigs with downstream and/or upstream can not be lined
+static void maskRepeat()
+{
+ int in_num, out_num, flagA, flagB;
+ int counter = 0;
+ int puzzleCounter = 0;
+ unsigned int i, bal_i;
+ int het_counter = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].mask)
+ continue;
+
+ bal_i = getTwinCtg(i);
+ in_num = validConnect(bal_i, NULL);
+ out_num = validConnect(i, NULL);
+
+ if(in_num > 1 || out_num > 1)
+ puzzleCounter++;
+ else
+ {
+ if(isSmallerThanTwin(i))
+ i++;
+
+ continue;
+
+ }
+
+ if(contig_array[i].cvg > 2 * cvgAvg)
+ {
+ counter++;
+ maskContig(i, 1);
+
+ //printf("thick mask contig %d and %d\n",i,bal_i);
+ if(isSmallerThanTwin(i))
+ i++;
+
+ continue;
+ }
+
+ if(in_num > 1)
+ flagA = checkUnique(bal_i, OverlapPercent);
+ else
+ flagA = 1;
+
+ if(out_num > 1)
+ flagB = checkUnique(i, OverlapPercent);
+ else
+ flagB = 1;
+
+ if(flagA == 0 || flagB == 0)
+ {
+ counter++;
+ maskContig(i, 1);
+ }
+ else
+ {
+ if(flagA == 2) //us find longest path
+ {
+ process_us_contig(bal_i);
+ }
+
+ if(flagB == 2) //ds find longest path
+ {
+ process_ds_contig(i);
+ }
+ }
+
+ if(flagA == 2 || flagB == 2)
+ het_counter++;
+
+ if(isSmallerThanTwin(i))
+ i++;
+ }
+
+ printf("[%s]%d contigs masked from %d puzzles\n", __FUNCTION__, counter, puzzleCounter);
+ printf("[%s]%d processed as heterozygous .\n", __FUNCTION__, het_counter);
+}
+
+
+static void ordering(boolean deWeak, boolean downS, boolean nonlinear, char *infile)
+{
+ //debugging();
+ if(downS)
+ {
+ downSlide();
+
+ //debugging();
+ if(deWeak)
+ deleteWeakCnt(weakPE);
+ }
+ else
+ {
+ if(deWeak)
+ deleteWeakCnt(weakPE);
+ }
+
+ //output_scaf(infile);
+ //debugging();
+ //printf("variance for insert size %d\n",ins_size_var);
+ simplifyCnt();
+ //debugging();
+
+ maskRepeat();
+ //debugging();
+ simplifyCnt();
+
+ if(nonlinear)
+ {
+ //printf("non-strict linearization\n");
+ general_linearization(0);
+ //linearization(0,0);
+ }
+
+ //maskRepeat();//???
+
+ maskPuzzle(2, 0);
+ //debugging();
+ freezing();
+ //debugging();
+
+}
+
+//check if contigs next to each other have reasonable overlap
+boolean checkOverlapInBetween(double tolerance)
+{
+ int i, gap;
+ int index;
+ unsigned int node;
+ int lenSum, lenOlp;
+ lenSum = lenOlp = 0;
+
+ for(i = 0; i < nodeCounter; i++)
+ {
+ node = nodesInSubInOrder[i];
+ lenSum += contig_array[node].length;
+ index = getIndexInArray(node);
+ nodeDistanceInOrder[i] = nodeDistance[index];
+ }
+
+ if(lenSum < 1)
+ return 1;
+
+ for(i = 0; i < nodeCounter - 1; i++)
+ {
+ gap = nodeDistanceInOrder[i + 1] - nodeDistanceInOrder[i]
+ - contig_array[nodesInSubInOrder[i + 1]].length;
+
+ if(-gap > 0)
+ lenOlp += -gap;
+
+ //if(-gap>ins_size_var)
+ if((double)lenOlp / lenSum > tolerance)
+ return 0;
+ }
+
+ return 1;
+}
+
+
+/********* the following codes are for freezing current scaffolds ****************/
+//set connections between contigs in a array to used or not
+//meanwhile set mask to the opposite value
+static boolean setUsed(unsigned int start, unsigned int *array, int max_steps, boolean flag)
+{
+ unsigned int prevCtg = start;
+ unsigned int twinA, twinB;
+ int j;
+ CONNECT *cnt;
+ boolean usedFlag = 0;
+ // save 'used' to 'checking'
+ prevCtg = start;
+
+ for(j = 0; j < max_steps; j++)
+ {
+ if(array[j] == 0)
+ break;
+
+ cnt = getCntBetween(prevCtg, array[j]);
+
+ if(!cnt)
+ {
+ printf("setUsed: no connect between %d and %d\n", prevCtg, array[j]);
+ prevCtg = array[j];
+ continue;
+ }
+
+ if(cnt->used == flag || cnt->nextInScaf || cnt->prevInScaf || cnt->singleInScaf)
+ {
+ return 1;
+ }
+
+ cnt->checking = cnt->used;
+ twinA = getTwinCtg(prevCtg);
+ twinB = getTwinCtg(array[j]);
+ cnt = getCntBetween(twinB, twinA);
+
+ if(cnt)
+ cnt->checking = cnt->used;
+
+ prevCtg = array[j];
+ }
+
+ // set used to flag
+ prevCtg = start;
+
+ for(j = 0; j < max_steps; j++)
+ {
+ if(array[j] == 0)
+ break;
+
+ cnt = getCntBetween(prevCtg, array[j]);
+
+ if(!cnt)
+ {
+ prevCtg = array[j];
+ continue;
+ }
+
+ if(cnt->used == flag)
+ {
+ usedFlag = 1;
+ break;
+ }
+
+ cnt->used = flag;
+ twinA = getTwinCtg(prevCtg);
+ twinB = getTwinCtg(array[j]);
+ cnt = getCntBetween(twinB, twinA);
+
+ if(cnt)
+ cnt->used = flag;
+
+ prevCtg = array[j];
+ }
+
+ // set mask to 'NOT flag' or set used to original value
+ prevCtg = start;
+
+ for(j = 0; j < max_steps; j++)
+ {
+ if(array[j] == 0)
+ break;
+
+ cnt = getCntBetween(prevCtg, array[j]);
+
+ if(!cnt)
+ {
+ prevCtg = array[j];
+ continue;
+ }
+
+ if(!usedFlag)
+ cnt->mask = 1 - flag;
+ else
+ cnt->used = cnt->checking;
+
+ twinA = getTwinCtg(prevCtg);
+ twinB = getTwinCtg(array[j]);
+ cnt = getCntBetween(twinB, twinA);
+ cnt->used = 1 - flag;
+
+ if(!usedFlag)
+ cnt->mask = 1 - flag;
+ else
+ cnt->used = cnt->checking;
+
+ prevCtg = array[j];
+ }
+
+ return usedFlag;
+}
+// break down scaffolds poorly supported by longer PE
+static void recoverMask()
+{
+ unsigned int i, ctg, bal_ctg, start, finish;
+ int num3, num5, j, t;
+ CONNECT *bindCnt, *cnt;
+ int min, max, max_steps = 5, num_route, length;
+ int tempCounter, recoverCounter = 0;
+ boolean multiUSE, change;
+
+ for(i = 1; i <= num_ctg; i++)
+ contig_array[i].flag = 0;
+
+ so_far = (unsigned int *)ckalloc(max_n_routes * sizeof(unsigned int));
+ found_routes = (unsigned int **)ckalloc(max_n_routes * sizeof(unsigned int *));
+
+ for(j = 0; j < max_n_routes; j++)
+ found_routes[j] = (unsigned int *)ckalloc(max_steps * sizeof(unsigned int));
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect)
+ continue;
+
+ bindCnt = getBindCnt(i);
+
+ if(!bindCnt)
+ continue;
+
+ //first scan get the average coverage by longer pe
+ num5 = num3 = 0;
+ ctg = i;
+ *(unsigned int *)darrayPut(scaf5, num5++) = i;
+ contig_array[i].flag = 1;
+ contig_array[getTwinCtg(i)].flag = 1;
+
+ while(bindCnt)
+ {
+ if(bindCnt->used)
+ break;
+
+ setConnectUsed(ctg, bindCnt->contigID, 1);
+ ctg = bindCnt->contigID;
+ *(unsigned int *)darrayPut(scaf5, num5++) = ctg;
+ bal_ctg = getTwinCtg(ctg);
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ ctg = getTwinCtg(i);
+ bindCnt = getBindCnt(ctg);
+
+ while(bindCnt)
+ {
+ if(bindCnt->used)
+ break;
+
+ setConnectUsed(ctg, bindCnt->contigID, 1);
+ ctg = bindCnt->contigID;
+ bal_ctg = getTwinCtg(ctg);
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ *(unsigned int *)darrayPut(scaf3, num3++) = bal_ctg;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ if(num5 + num3 < 2)
+ continue;
+
+ tempCounter = solidCounter = 0;
+
+ for(j = num3 - 1; j >= 0; j--)
+ *(unsigned int *)darrayPut(tempArray, tempCounter++) =
+ *(unsigned int *)darrayGet(scaf3, j);
+
+ for(j = 0; j < num5; j++)
+ *(unsigned int *)darrayPut(tempArray, tempCounter++) =
+ *(unsigned int *)darrayGet(scaf5, j);
+
+ change = 0;
+
+ for(t = 0; t < tempCounter - 1; t++)
+ {
+ *(unsigned int *)darrayPut(solidArray, solidCounter++) =
+ *(unsigned int *)darrayGet(tempArray, t);
+ start = *(unsigned int *)darrayGet(tempArray, t);
+ finish = *(unsigned int *)darrayGet(tempArray, t + 1);
+ num_route = num_trace = 0;
+ cnt = checkConnect(start, finish);
+
+ if(!cnt)
+ {
+ printf("Warning from recoverMask: no connection (%d %d), start at %d\n",
+ start, finish, i);
+ cnt = getCntBetween(start, finish);
+
+ if(cnt)
+ debugging1(start, finish);
+
+ continue;
+ }
+
+ length = cnt->gapLen + contig_array[finish].length;
+ min = length - 1.5 * ins_size_var;
+ max = length + 1.5 * ins_size_var;
+ traceAlongMaskedCnt(finish, start, max_steps, min, max, 0, 0, &num_route);
+
+ if(finish == start)
+ {
+ for(j = 0; j < tempCounter; j++)
+ printf("->%d", *(unsigned int *)darrayGet(tempArray, j));
+
+ printf(": start at %d\n", i);
+ }
+
+ if(num_route == 1)
+ {
+ for(j = 0; j < max_steps; j++)
+ if(found_routes[0][j] == 0)
+ break;
+
+ if(j < 1)
+ continue;
+
+ //check if connects have been used more than once
+ multiUSE = setUsed(start, found_routes[0], max_steps, 1);
+
+ if(multiUSE)
+ continue;
+
+ for(j = 0; j < max_steps; j++)
+ {
+ if(j + 1 == max_steps || found_routes[0][j + 1] == 0)
+ break;
+
+ *(unsigned int *)darrayPut(solidArray, solidCounter++) = found_routes[0][j];
+ contig_array[found_routes[0][j]].flag = 1;
+ contig_array[getTwinCtg(found_routes[0][j])].flag = 1;
+ }
+
+ recoverCounter += j;
+ setConnectDelete(start, finish, 1, 1);
+ change = 1;
+ } //end if num_route=1
+ } // for each gap
+
+ *(unsigned int *)darrayPut(solidArray, solidCounter++) =
+ *(unsigned int *)darrayGet(tempArray, tempCounter - 1);
+
+ if(change)
+ consolidate();
+ }
+
+ //printf("%d contigs recovered\n",recoverCounter);
+ fflush(stdout);
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ cnt = contig_array[i].downwardConnect;
+
+ while(cnt)
+ {
+ cnt->used = 0;
+ cnt->checking = 0;
+ cnt = cnt->next;
+ }
+ }
+
+ for(j = 0; j < max_n_routes; j++)
+ free((void *)found_routes[j]);
+
+ free((void *)found_routes);
+ free((void *)so_far);
+}
+
+
+// A -> B -> C -> D un-bind link B->C to link A->B and B->C
+// A' <- B' <- C' <- D'
+static void unBindLink(unsigned int CB, unsigned int CC)
+{
+ //fprintf(stderr,"Unbind link (%d %d) to others...\n",CB,CC);
+ CONNECT *cnt1 = getCntBetween(CB, CC);
+
+ if(!cnt1)
+ return;
+
+ if(cnt1->singleInScaf)
+ cnt1->singleInScaf = 0;
+
+ CONNECT *cnt2 = getCntBetween(getTwinCtg(CC), getTwinCtg(CB));
+
+ if(!cnt2)
+ return;
+
+ if(cnt2->singleInScaf)
+ cnt2->singleInScaf = 0;
+
+ if(cnt1->nextInScaf)
+ {
+ unsigned int CD = cnt1->nextInScaf->contigID;
+ cnt1->nextInScaf->prevInScaf = 0;
+ cnt1->nextInScaf = NULL;
+ CONNECT *cnt3 = getCntBetween(getTwinCtg(CD), getTwinCtg(CC));
+
+ if(cnt3)
+ cnt3->nextInScaf = NULL;
+
+ cnt2->prevInScaf = 0;
+ }
+
+ if(cnt2->nextInScaf)
+ {
+ unsigned int bal_CA = cnt2->nextInScaf->contigID;
+ cnt2->nextInScaf->prevInScaf = 0;
+ cnt2->nextInScaf = NULL;
+ CONNECT *cnt4 = getCntBetween(getTwinCtg(bal_CA), CB);
+
+ if(cnt4)
+ cnt4->nextInScaf = NULL;
+
+ cnt1->prevInScaf = 0;
+ }
+}
+
+static void freezing()
+{
+ int num5, num3;
+ unsigned int ctg, bal_ctg;
+ unsigned int i;
+ int j, t;
+ CONNECT *cnt, *prevCNT, *nextCnt;
+ boolean excep;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ contig_array[i].flag = 0;
+ contig_array[i].from_vt = 0;
+ contig_array[i].to_vt = 0;
+ cnt = contig_array[i].downwardConnect;
+
+ while(cnt)
+ {
+ cnt->used = 0;
+ cnt->checking = 0;
+ cnt->singleInScaf = 0;
+ cnt = cnt->next;
+ }
+ }
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].flag || contig_array[i].mask)
+ continue;
+
+ if(!contig_array[i].downwardConnect || !validConnect(i, NULL))
+ {
+ continue;
+ }
+
+ num5 = num3 = 0;
+ ctg = i;
+ *(unsigned int *)darrayPut(scaf5, num5++) = i;
+ contig_array[i].flag = 1;
+ contig_array[getTwinCtg(i)].flag = 1;
+ prevCNT = NULL;
+ cnt = getNextContig(ctg, prevCNT, &excep);
+
+ while(cnt)
+ {
+ if(contig_array[cnt->contigID].flag)
+ {
+ unBindLink(ctg, cnt->contigID);
+ break;
+ }
+
+ nextCnt = getNextContig(cnt->contigID, cnt, &excep);
+ setConnectUsed(ctg, cnt->contigID, 1);
+ ctg = cnt->contigID;
+ *(unsigned int *)darrayPut(scaf5, num5++) = ctg;
+ bal_ctg = getTwinCtg(ctg);
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ }
+
+ ctg = getTwinCtg(i);
+
+ if(num5 >= 2)
+ prevCNT = checkConnect(getTwinCtg(*(unsigned int *)darrayGet(scaf5, 1)), ctg);
+ else
+ prevCNT = NULL;
+
+ cnt = getNextContig(ctg, prevCNT, &excep);
+
+ while(cnt)
+ {
+ if(contig_array[cnt->contigID].flag)
+ {
+ unBindLink(ctg, cnt->contigID);
+ break;
+ }
+
+ nextCnt = getNextContig(cnt->contigID, cnt, &excep);
+ setConnectUsed(ctg, cnt->contigID, 1);
+ ctg = cnt->contigID;
+ bal_ctg = getTwinCtg(ctg);
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ *(unsigned int *)darrayPut(scaf3, num3++) = bal_ctg;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ }
+
+ if(num5 + num3 < 2)
+ continue;
+
+ solidCounter = 0;
+
+ for(j = num3 - 1; j >= 0; j--)
+ *(unsigned int *)darrayPut(solidArray, solidCounter++) =
+ *(unsigned int *)darrayGet(scaf3, j);
+
+ for(j = 0; j < num5; j++)
+ *(unsigned int *)darrayPut(solidArray, solidCounter++) =
+ *(unsigned int *)darrayGet(scaf5, j);
+
+ unsigned int firstCtg = 0;
+ unsigned int lastCtg = 0;
+ unsigned int firstTwin = 0;
+ unsigned int lastTwin = 0;
+
+ for(t = 0; t < solidCounter; t++)
+ if(!contig_array[*(unsigned int *)darrayGet(solidArray, t)].mask)
+ {
+ firstCtg = *(unsigned int *)darrayGet(solidArray, t);
+ break;
+ }
+
+ for(t = solidCounter - 1; t >= 0; t--)
+ if(!contig_array[*(unsigned int *)darrayGet(solidArray, t)].mask)
+ {
+ lastCtg = *(unsigned int *)darrayGet(solidArray, t);
+ break;
+ }
+
+ if(firstCtg == 0 || lastCtg == 0)
+ {
+ printf("scaffold start at %d, stop at %d, freezing began with %d\n", firstCtg, lastCtg, i);
+
+ for(j = 0; j < solidCounter; j++)
+ printf("->%d(%d %d)", *(unsigned int *)darrayGet(solidArray, j)
+ , contig_array[*(unsigned int *)darrayGet(solidArray, j)].mask
+ , contig_array[*(unsigned int *)darrayGet(solidArray, j)].flag);
+
+ printf("\n");
+ }
+ else
+ {
+ firstTwin = getTwinCtg(firstCtg);
+ lastTwin = getTwinCtg(lastCtg);
+ }
+
+ for(t = 0; t < solidCounter; t++)
+ {
+ unsigned int ctg = *(unsigned int *)darrayGet(solidArray, t);
+
+ if(contig_array[ctg].from_vt > 0)
+ {
+ contig_array[ctg].mask = 1;
+ contig_array[getTwinCtg(ctg)].mask = 1;
+ printf("Repeat: contig %d (%d) appears more than once\n", ctg, getTwinCtg(ctg));
+ }
+ else
+ {
+ contig_array[ctg].from_vt = firstCtg;
+ contig_array[ctg].to_vt = lastCtg;
+ contig_array[ctg].indexInScaf = t + 1;
+ contig_array[getTwinCtg(ctg)].from_vt = lastTwin;
+ contig_array[getTwinCtg(ctg)].to_vt = firstTwin;
+ contig_array[getTwinCtg(ctg)].indexInScaf = solidCounter - t;
+ }
+ }
+
+ consolidate();
+ }
+
+ //printf("Freezing is done....\n");
+ fflush(stdout);
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].flag)
+ contig_array[i].flag = 0;
+
+ if(contig_array[i].from_vt == 0)
+ {
+ contig_array[i].from_vt = i;
+ contig_array[i].to_vt = i;
+ }
+
+ cnt = contig_array[i].downwardConnect;
+
+ while(cnt)
+ {
+ cnt->used = 0;
+ cnt->checking = 0;
+ cnt = cnt->next;
+ }
+ }
+
+}
+
+/************** codes below this line are for pulling the scaffolds out ************/
+void output1gap(FILE *fo, int max_steps)
+{
+ int i, len, seg;
+ len = seg = 0;
+
+ for(i = 0; i < max_steps - 1; i++)
+ {
+ if(found_routes[0][i + 1] == 0)
+ break;
+
+ len += contig_array[found_routes[0][i]].length;
+ seg++;
+ }
+
+ fprintf(fo, "GAP %d %d", len, seg);
+
+ for(i = 0; i < max_steps - 1; i++)
+ {
+ if(found_routes[0][i + 1] == 0)
+ break;
+
+ fprintf(fo, " %d", found_routes[0][i]);
+ }
+
+ fprintf(fo, "\n");
+}
+
+static int weakCounter;
+
+static boolean printCnts(FILE *fp, unsigned int ctg)
+{
+ CONNECT *cnt = contig_array[ctg].downwardConnect;
+ boolean flag = 0, ret = 0;
+ unsigned int bal_ctg = getTwinCtg(ctg);
+ unsigned int linkCtg;
+
+ if(isSameAsTwin(ctg))
+ return ret;
+
+ CONNECT *bindCnt = getBindCnt(ctg);
+
+ if(bindCnt && bindCnt->bySmall && bindCnt->weakPoint)
+ {
+ weakCounter++;
+ fprintf(fp, "\tWP");
+ ret = 1;
+ }
+
+ while(cnt)
+ {
+ if(cnt->weight && !cnt->inherit)
+ {
+ if(!flag)
+ {
+ flag = 1;
+ fprintf(fp, "\t#DOWN ");
+ }
+
+ linkCtg = cnt->contigID;
+
+ if(isLargerThanTwin(linkCtg))
+ linkCtg = getTwinCtg(linkCtg);
+
+ fprintf(fp, "%d:%d:%d ", index_array[linkCtg], cnt->weight, cnt->gapLen);
+ }
+
+ cnt = cnt->next;
+ }
+
+ flag = 0;
+ cnt = contig_array[bal_ctg].downwardConnect;
+
+ while(cnt)
+ {
+ if(cnt->weight && !cnt->inherit)
+ {
+ if(!flag)
+ {
+ flag = 1;
+ fprintf(fp, "\t#UP ");
+ }
+
+ linkCtg = cnt->contigID;
+
+ if(isLargerThanTwin(linkCtg))
+ linkCtg = getTwinCtg(linkCtg);
+
+ fprintf(fp, "%d:%d:%d ", index_array[linkCtg], cnt->weight, cnt->gapLen);
+ }
+
+ cnt = cnt->next;
+ }
+
+ fprintf(fp, "\n");
+ return ret;
+}
+
+void scaffolding(unsigned int len_cut, char *outfile)
+{
+ unsigned int prev_ctg, ctg, bal_ctg, *length_array, count = 0, num_lctg = 0;
+ unsigned int i, max_steps = 5;
+ int num5, num3, j, len, flag, num_route, gap_c = 0;
+ short gap = 0;
+ long long sum = 0, N50, N90;
+ FILE *fp, *fo = NULL;
+ char name[256];
+ CONNECT *cnt, *prevCNT, *nextCnt;
+ boolean excep, weak;
+ weakCounter = 0;
+
+ so_far = (unsigned int *)ckalloc(max_n_routes * sizeof(unsigned int));
+ found_routes = (unsigned int **)ckalloc(max_n_routes * sizeof(unsigned int *));
+
+ for(j = 0; j < max_n_routes; j++)
+ found_routes[j] = (unsigned int *)ckalloc(max_steps * sizeof(unsigned int));
+
+ length_array = (unsigned int *)ckalloc((num_ctg + 1) * sizeof(unsigned int));
+
+ //use length_array to change info in index_array
+ for(i = 1; i <= num_ctg; i++)
+ length_array[i] = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(index_array[i] > 0)
+ length_array[index_array[i]] = i;
+ }
+
+ for(i = 1; i <= num_ctg; i++)
+ index_array[i] = length_array[i]; //contig i with original index: index_array[i]
+
+ orig2new = 0;
+
+ sprintf(name, "%s.scaf", outfile);
+ fp = ckopen(name, "w");
+ sprintf(name, "%s.scaf_gap", outfile);
+ fo = ckopen(name, "w");
+
+ scaf3 = (DARRAY *)createDarray(1000, sizeof(unsigned int));
+ scaf5 = (DARRAY *)createDarray(1000, sizeof(unsigned int));
+ gap3 = (DARRAY *)createDarray(1000, sizeof(int));
+ gap5 = (DARRAY *)createDarray(1000, sizeof(int));
+
+ for(i = 1; i <= num_ctg; i++)
+ contig_array[i].flag = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].length + (unsigned int)overlaplen >= len_cut)
+ num_lctg++;
+ else
+ continue;
+
+ if(contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect || !validConnect(i, NULL))
+ continue;
+
+ num5 = num3 = 0;
+ ctg = i;
+ //printf("%d",i);
+ *(unsigned int *)darrayPut(scaf5, num5++) = i;
+ contig_array[i].flag = 1;
+ bal_ctg = getTwinCtg(ctg);
+ contig_array[bal_ctg].flag = 1;
+ len = contig_array[i].length;
+ prevCNT = NULL;
+ cnt = getNextContig(ctg, prevCNT, &excep);
+
+ while(cnt)
+ {
+ nextCnt = getNextContig(cnt->contigID, cnt, &excep);
+
+ if(excep && prevCNT)
+ printf("scaffolding: exception --- prev cnt from %u\n", prevCNT->contigID);
+
+ if(nextCnt && nextCnt->used)
+ break;
+
+ setConnectUsed(ctg, cnt->contigID, 1);
+ *(int *)darrayPut(gap5, num5 - 1) = cnt->gapLen;
+ ctg = cnt->contigID;
+ *(unsigned int *)darrayPut(scaf5, num5++) = ctg;
+ len += cnt->gapLen + contig_array[ctg].length;
+ bal_ctg = getTwinCtg(ctg);
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ //printf("->%d",ctg);
+ }
+
+ //printf("\n");
+
+ ctg = getTwinCtg(i);
+
+ if(num5 >= 2)
+ prevCNT = checkConnect(getTwinCtg(*(unsigned int *)darrayGet(scaf5, 1)), ctg);
+ else
+ prevCNT = NULL;
+
+ //printf("%d",i);
+ //fflush(stdout);
+ cnt = getNextContig(ctg, prevCNT, &excep);
+
+ while(cnt)
+ {
+ nextCnt = getNextContig(cnt->contigID, cnt, &excep);
+
+ if(excep && prevCNT)
+ printf("scaffolding: exception -- prev cnt from %u\n", prevCNT->contigID);
+
+ if(nextCnt && nextCnt->used)
+ break;
+
+ setConnectUsed(ctg, cnt->contigID, 1);
+ ctg = cnt->contigID;
+ len += cnt->gapLen + contig_array[ctg].length;
+ bal_ctg = getTwinCtg(ctg);
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ //printf("<-%d",bal_ctg);
+ //fflush(stdout);
+ *(int *)darrayPut(gap3, num3) = cnt->gapLen;
+ *(unsigned int *)darrayPut(scaf3, num3++) = bal_ctg;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ }
+
+ //printf("\n");
+ len += overlaplen;
+ sum += len;
+ length_array[count++] = len;
+
+ if(num5 + num3 < 1)
+ {
+ //printf("no scaffold created for contig %d\n",i);
+ continue;
+ }
+
+ fprintf(fp, ">scaffold%d %d %d\n", count, num5 + num3, len);
+ fprintf(fo, ">scaffold%d %d %d\n", count, num5 + num3, len);
+ len = prev_ctg = 0;
+
+ for(j = num3 - 1; j >= 0; j--)
+ {
+ if(!isLargerThanTwin(*(unsigned int *)darrayGet(scaf3, j)))
+ {
+ fprintf(fp, "%-10d %-10d + %d "
+ , index_array[*(unsigned int *)darrayGet(scaf3, j)], len,
+ contig_array[*(unsigned int *)darrayGet(scaf3, j)].length + overlaplen);
+ weak = printCnts(fp, *(unsigned int *)darrayGet(scaf3, j));
+ /*
+ if(weak)
+ fprintf(stderr,"scaffold%d\n",count);
+ */
+ }
+ else
+ {
+ fprintf(fp, "%-10d %-10d - %d "
+ , index_array[getTwinCtg(*(unsigned int *)darrayGet(scaf3, j))], len
+ , contig_array[*(unsigned int *)darrayGet(scaf3, j)].length + overlaplen);
+ weak = printCnts(fp, *(unsigned int *)darrayGet(scaf3, j));
+ /*
+ if(weak)
+ fprintf(stderr,"scaffold%d\n",count);
+ */
+ }
+
+ if(prev_ctg)
+ {
+ num_route = num_trace = 0;
+ traceAlongArc(*(unsigned int *)darrayGet(scaf3, j), prev_ctg, max_steps
+ , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route);
+
+ if(num_route == 1)
+ {
+ output1gap(fo, max_steps);
+ gap_c++;
+ }
+ }
+
+ fprintf(fo, "%-10d %-10d\n", *(unsigned int *)darrayGet(scaf3, j), len);
+ len += contig_array[*(unsigned int *)darrayGet(scaf3, j)].length + *(int *)darrayGet(gap3, j);
+ prev_ctg = *(unsigned int *)darrayGet(scaf3, j);
+ gap = *(int *)darrayGet(gap3, j) > 0 ? *(int *)darrayGet(gap3, j) : 0;
+ }
+
+ for(j = 0; j < num5; j++)
+ {
+ if(!isLargerThanTwin(*(unsigned int *)darrayGet(scaf5, j)))
+ {
+ fprintf(fp, "%-10d %-10d + %d "
+ , index_array[*(unsigned int *)darrayGet(scaf5, j)], len
+ , contig_array[*(unsigned int *)darrayGet(scaf5, j)].length + overlaplen);
+ weak = printCnts(fp, *(unsigned int *)darrayGet(scaf5, j));
+ /*
+ if(weak)
+ fprintf(stderr,"scaffold%d\n",count);
+ */
+ }
+ else
+ {
+ fprintf(fp, "%-10d %-10d - %d "
+ , index_array[getTwinCtg(*(unsigned int *)darrayGet(scaf5, j))], len
+ , contig_array[*(unsigned int *)darrayGet(scaf5, j)].length + overlaplen);
+ weak = printCnts(fp, *(unsigned int *)darrayGet(scaf5, j));
+ /*
+ if(weak)
+ fprintf(stderr,"scaffold%d\n",count);
+ */
+ }
+
+ if(prev_ctg)
+ {
+ num_route = num_trace = 0;
+ traceAlongArc(*(unsigned int *)darrayGet(scaf5, j), prev_ctg, max_steps
+ , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route);
+
+ if(num_route == 1)
+ {
+ output1gap(fo, max_steps);
+ gap_c++;
+ }
+ }
+
+ fprintf(fo, "%-10d %-10d\n", *(unsigned int *)darrayGet(scaf5, j), len);
+
+ if(j < num5 - 1)
+ {
+ len += contig_array[*(unsigned int *)darrayGet(scaf5, j)].length +
+ *(int *)darrayGet(gap5, j);
+ prev_ctg = *(unsigned int *)darrayGet(scaf5, j);
+ gap = *(int *)darrayGet(gap5, j) > 0 ? *(int *)darrayGet(gap5, j) : 0;
+ }
+ }
+
+ }
+
+ freeDarray(scaf3);
+ freeDarray(scaf5);
+ freeDarray(gap3);
+ freeDarray(gap5);
+
+ fclose(fp);
+ fclose(fo);
+ //printf("\n%d scaffolds from %d contigs sum up %lldbp, with average length %lld, %d gaps filled\n"
+ // ,count,num_lctg/2,sum,sum/count,gap_c);
+ printf("[%s]scaffold(s) created : %d , total length : %lld.\n", __FUNCTION__, count , sum);
+
+ //output singleton
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].length + (unsigned int)overlaplen < len_cut || contig_array[i].flag)
+ continue;
+
+ length_array[count++] = contig_array[i].length;
+ sum += contig_array[i].length;
+
+ if(isSmallerThanTwin(i))
+ i++;
+ }
+
+ // calculate N50/N90
+ //printf("%d scaffolds&singleton sum up %lldbp, with average length %lld\n"
+ // ,count,sum,sum/count);
+ printf("[%s]total number of scaffold(s) and singleton(s) : %d, total length : %lld.\n", __FUNCTION__, count, sum);
+ qsort(length_array, count, sizeof(length_array[0]), cmp_int);
+ //printf("the longest is %dbp,",length_array[count-1]);
+ N50 = sum * 0.5;
+ N90 = sum * 0.9;
+ sum = flag = 0;
+
+ for(j = count - 1; j >= 0; j--)
+ {
+ sum += length_array[j];
+
+ if(!flag && sum >= N50)
+ {
+ printf("[%s]N50 : %d bp, ", __FUNCTION__, length_array[j]);
+ flag++;
+ }
+
+ if(sum >= N90)
+ {
+ printf(" N90 : %d bp\n", length_array[j]);
+ break;
+ }
+ }
+
+ //printf("Found %d weak points in scaffolds\n",weakCounter);
+ fflush(stdout);
+ free((void *)length_array);
+
+ for(j = 0; j < max_n_routes; j++)
+ free((void *)found_routes[j]);
+
+ free((void *)found_routes);
+ free((void *)so_far);
+}
+
+
+void outputLinks(FILE *fp, int insertS)
+{
+ unsigned int i, bal_ctg, bal_toCtg;
+ CONNECT *cnts, *temp_cnt;
+
+ //printf("outputLinks, %d contigs\n",num_ctg);
+ for(i = 1; i <= num_ctg; i++)
+ {
+ cnts = contig_array[i].downwardConnect;
+ bal_ctg = getTwinCtg(i);
+
+ while(cnts)
+ {
+ if(cnts->weight < 1)
+ {
+ cnts = cnts->next;
+ continue;
+ }
+
+ fprintf(fp, "%-10d %-10d\t%d\t%d\t%d\n"
+ , i, cnts->contigID, cnts->gapLen, cnts->weight, insertS);
+ cnts->weight = 0;
+
+ bal_toCtg = getTwinCtg(cnts->contigID);
+ temp_cnt = getCntBetween(bal_toCtg, bal_ctg);
+
+ if(temp_cnt)
+ temp_cnt->weight = 0;
+
+ cnts = cnts->next;
+ }
+ }
+}
+
+//use pe info in ascent order
+void PE2Links(char *infile)
+{
+ fprintf(stderr, "[%s]entering this function.\n", __FUNCTION__);
+ char name[256], *line;
+ FILE *fp, *linkF;
+ int i;
+ int flag = 0;
+ unsigned int j;
+
+
+ sprintf(name, "%s.links", infile);
+ /*linkF = fopen(name,"r");
+ if(linkF){
+ printf("file %s exists, skip creating the links...\n",name);
+ fclose(linkF);
+ return;
+ }*/
+
+ linkF = ckopen(name, "w");
+
+ if(!pes)
+ loadPEgrads(infile);
+
+ sprintf(name, "%s.readOnContig", infile);
+ fp = ckopen(name, "r");
+
+ lineLen = 1024;
+ line = (char *)ckalloc(lineLen * sizeof(char));
+
+ fgets(line, lineLen, fp);
+ line[0] = '\0';
+
+ //printf("\n");
+ for(i = 0; i < gradsCounter; i++)
+ {
+ createCntMemManager();
+ createCntLookupTable();
+
+ newCntCounter = 0;
+ flag += connectByPE_grad(fp, i, line);
+
+ //printf("%lld new connections\n",newCntCounter/2);
+ if(!flag)
+ {
+ destroyConnectMem();
+ deleteCntLookupTable();
+
+ for(j = 1; j <= num_ctg; j++)
+ contig_array[j].downwardConnect = NULL;
+
+ //printf("\n");
+ continue;
+ }
+
+ flag = 0;
+ outputLinks(linkF, pes[i].insertS);
+ destroyConnectMem();
+ deleteCntLookupTable();
+
+ for(j = 1; j <= num_ctg; j++)
+ contig_array[j].downwardConnect = NULL;
+ }
+
+
+ free((void *)line);
+ fclose(fp);
+ fclose(linkF);
+ printf("[%s]all PEs attached\n", __FUNCTION__);
+
+}
+
+int inputLinks(FILE *fp, int insertS, char *line)
+{
+ unsigned int ctg, bal_ctg, toCtg, bal_toCtg;
+ int gap, wt, ins;
+ unsigned int counter = 0, onScafCounter = 0;
+ unsigned int maskCounter = 0;
+
+ if(strlen(line))
+ {
+ sscanf(line, "%d %d %d %d %d", &ctg, &toCtg, &gap, &wt, &ins);
+
+ if(ins != insertS)
+ return counter;
+
+ //if(contig_array[ctg].length>=ctg_short&&contig_array[toCtg].length>=ctg_short){
+ if(1)
+ {
+ bal_ctg = getTwinCtg(ctg);
+ bal_toCtg = getTwinCtg(toCtg);
+ add1Connect(ctg, toCtg, gap, wt, 0);
+ add1Connect(bal_toCtg, bal_ctg, gap, wt, 0);
+ counter++;
+
+ if(contig_array[ctg].mask || contig_array[toCtg].mask)
+ maskCounter++;
+
+ if(insertS > 1000 &&
+ contig_array[ctg].from_vt == contig_array[toCtg].from_vt && // on the same scaff
+ contig_array[ctg].indexInScaf < contig_array[toCtg].indexInScaf)
+ {
+ add1LongPEcov(ctg, toCtg, wt);
+ onScafCounter++;
+ }
+ }
+ }
+
+ while(fgets(line, lineLen, fp) != NULL)
+ {
+ sscanf(line, "%d %d %d %d %d", &ctg, &toCtg, &gap, &wt, &ins);
+
+ if(ins != insertS)
+ //if(ins>insertS)
+ break;
+
+ /*
+ if(contig_array[ctg].length<ctg_short||contig_array[toCtg].length<ctg_short)
+ continue;
+ */
+ if(insertS > 1000 &&
+ contig_array[ctg].from_vt == contig_array[toCtg].from_vt && // on the same scaff
+ contig_array[ctg].indexInScaf < contig_array[toCtg].indexInScaf)
+ {
+ add1LongPEcov(ctg, toCtg, wt);
+ onScafCounter++;
+ }
+
+ bal_ctg = getTwinCtg(ctg);
+ bal_toCtg = getTwinCtg(toCtg);
+ add1Connect(ctg, toCtg, gap, wt, 0);
+ add1Connect(bal_toCtg, bal_ctg, gap, wt, 0);
+ counter++;
+
+ if(contig_array[ctg].mask || contig_array[toCtg].mask)
+ maskCounter++;
+ }
+
+ //printf("%d link to masked contigs, %d links on a single scaff\n",maskCounter,onScafCounter);
+ return counter;
+}
+//use linkage info in ascent order
+void Links2Scaf(char *infile)
+{
+ char name[256], *line;
+ FILE *fp;
+ int i, lib_n = 0, cutoff_sum = 0;
+ int flag = 0, flag2;
+ boolean downS, nonLinear = 0, smallPE = 0, isPrevSmall = 0, markSmall;
+
+ if(!pes)
+ loadPEgrads(infile);
+
+ sprintf(name, "%s.links", infile);
+ fp = ckopen(name, "r");
+
+ createCntMemManager();
+ createCntLookupTable();
+
+ lineLen = 1024;
+ line = (char *)ckalloc(lineLen * sizeof(char));
+
+ fgets(line, lineLen, fp);
+ line[0] = '\0';
+
+
+ solidArray = (DARRAY *)createDarray(1000, sizeof(unsigned int));
+ tempArray = (DARRAY *)createDarray(1000, sizeof(unsigned int));
+ scaf3 = (DARRAY *)createDarray(1000, sizeof(unsigned int));
+ scaf5 = (DARRAY *)createDarray(1000, sizeof(unsigned int));
+ gap3 = (DARRAY *)createDarray(1000, sizeof(int));
+ gap5 = (DARRAY *)createDarray(1000, sizeof(int));
+
+ weakPE = 3; //0531
+
+ //printf("\n");
+ for(i = 0; i < gradsCounter; i++)
+ {
+ /*if(pes[i].insertS<1000)
+ isPrevSmall = 1;
+ else if(pes[i].insertS>1000&&isPrevSmall){
+ smallScaf();
+ isPrevSmall = 0;
+ }*/
+ flag2 = inputLinks(fp, pes[i].insertS, line);
+
+ //printf("Insert size %d: %d links input\n",pes[i].insertS,flag2);
+ if(flag2)
+ {
+ lib_n++;
+ cutoff_sum += pes[i].pair_num_cut;
+ weakPE = cutoff_sum;
+ }
+
+ flag += flag2;
+
+ if(!flag)
+ {
+ //printf("\n");
+ continue;
+ }
+
+ if(i == gradsCounter - 1 || pes[i + 1].rank != pes[i].rank)
+ {
+ flag = nonLinear = downS = markSmall = 0;
+
+ if(pes[i].insertS > 1000 && pes[i].rank > 1)
+ downS = 1;
+
+ if(pes[i].insertS <= 1000)
+ smallPE = 1;
+
+ if(pes[i].insertS >= 1000)
+ {
+ ins_size_var = 50;
+ //OverlapPercent = 0.05;
+ }
+ else if(pes[i].insertS >= 300)
+ {
+ ins_size_var = 30;
+ //OverlapPercent = 0.05;
+ }
+ else
+ {
+ ins_size_var = 20;
+ //OverlapPercent = 0.05;
+ }
+
+ //if(pes[i].insertS>1000)
+ //weakPE = 5;
+ //static_f = 1;
+ //if(lib_n>0){
+ //weakPE = weakPE<cutoff_sum/lib_n ? cutoff_sum/lib_n:weakPE;
+ //lib_n = cutoff_sum = 0;
+ //}
+
+ printf("[%s]weight threshold for a connection in grad %d : %d, %d.\n", __FUNCTION__, i, weakPE, cutoff_sum);
+
+ //printf("cut off for weight of connections : %d\n",weakPE);
+ if(i == gradsCounter - 1)
+ nonLinear = 1;
+
+ if(i == gradsCounter - 1 && !isPrevSmall && smallPE)
+ detectBreakScaf();
+
+ ordering(1, downS, nonLinear, infile);
+
+ if(i == gradsCounter - 1)
+ recoverMask();
+ }
+ }
+
+ freeDarray(tempArray);
+ freeDarray(solidArray);
+ freeDarray(scaf3);
+ freeDarray(scaf5);
+ freeDarray(gap3);
+ freeDarray(gap5);
+
+ free((void *)line);
+ fclose(fp);
+ //printf("all links loaded\n");
+
+}
+/* below for picking up a subgraph (with at most one node has upstream connections to the rest
+ and at most one downstream connections) in general */
+
+// static int nodeCounter
+static boolean putNodeInArray(unsigned int node, int maxNodes, int dis)
+{
+ if(contig_array[node].inSubGraph)
+ return 1;
+
+ int index = nodeCounter;
+
+ if(index > maxNodes)
+ return 0;
+
+ if(contig_array[getTwinCtg(node)].inSubGraph)
+ return 0;
+
+ ctg4heapArray[index].ctgID = node;
+ ctg4heapArray[index].dis = dis;
+ contig_array[node].inSubGraph = 1;
+
+ ctg4heapArray[index].ds_shut4dheap = 0;
+ ctg4heapArray[index].us_shut4dheap = 0;
+ ctg4heapArray[index].ds_shut4uheap = 0;
+ ctg4heapArray[index].us_shut4uheap = 0;
+
+ return 1;
+}
+
+static void setInGraph(boolean flag)
+{
+ int i;
+ int node;
+ nodeCounter = nodeCounter > MaxNodeInSub ? MaxNodeInSub : nodeCounter;
+
+ for(i = 1; i <= nodeCounter; i++)
+ {
+ node = ctg4heapArray[i].ctgID;
+
+ if(node > 0)
+ contig_array[node].inSubGraph = flag;
+ }
+}
+
+static boolean dispatch1node(int dis, unsigned int tempNode, int maxNodes,
+ FibHeap *dheap, FibHeap *uheap, int *DmaxDis, int *UmaxDis)
+{
+ boolean ret;
+
+ if(dis >= 0) // put it to Dheap
+ {
+ nodeCounter++;
+ ret = putNodeInArray(tempNode, maxNodes, dis);
+
+ if(!ret)
+ return 0;
+
+ insertNodeIntoHeap(dheap, dis, nodeCounter);
+
+ if(dis > *DmaxDis)
+ *DmaxDis = dis;
+
+ return 1;
+ }
+ else // put it to Uheap
+ {
+ nodeCounter++;
+ ret = putNodeInArray(tempNode, maxNodes, dis);
+
+ if(!ret)
+ return 0;
+
+ insertNodeIntoHeap(uheap, -dis, nodeCounter);
+ int temp_len = contig_array[tempNode].length;
+
+ if(-dis + temp_len > *UmaxDis)
+ *UmaxDis = -dis + contig_array[tempNode].length;
+
+ return -1;
+ }
+
+ return 0;
+}
+
+static boolean canDheapWait(unsigned int currNode, int dis, int DmaxDis)
+{
+ if(dis < DmaxDis)
+ return 0;
+ else
+ return 1;
+}
+
+static boolean workOnDheap(FibHeap *dheap, FibHeap *uheap, boolean *Dwait, boolean *Uwait,
+ int *DmaxDis, int *UmaxDis, int maxNodes)
+{
+ if(*Dwait)
+ return 1;
+
+ unsigned int currNode, twin, tempNode;
+ CTGinHEAP *ctgInHeap;
+ int indexInArray;
+ CONNECT *us_cnt, *ds_cnt;
+ int dis0, dis;
+ boolean ret, isEmpty;
+
+ while((indexInArray = removeNextNodeFromHeap(dheap)) != 0)
+ {
+ ctgInHeap = &ctg4heapArray[indexInArray];
+ currNode = ctgInHeap->ctgID;
+ dis0 = ctgInHeap->dis;
+
+ isEmpty = IsHeapEmpty(dheap);
+
+ twin = getTwinCtg(currNode);
+ us_cnt = ctgInHeap->us_shut4dheap ? NULL : contig_array[twin].downwardConnect;
+
+ while(us_cnt)
+ {
+ if(us_cnt->deleted || us_cnt->mask ||
+ contig_array[getTwinCtg(us_cnt->contigID)].inSubGraph)
+ {
+ us_cnt = us_cnt->next;
+ continue;
+ }
+
+ tempNode = getTwinCtg(us_cnt->contigID);
+
+ if(contig_array[tempNode].inSubGraph)
+ {
+ us_cnt = us_cnt->next;
+ continue;
+ }
+
+ dis = dis0 - us_cnt->gapLen - (int)contig_array[twin].length;
+
+ ret = dispatch1node(dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis);
+
+ if(ret == 0)
+ return 0;
+ else if(ret < 0)
+ *Uwait = 0;
+
+ us_cnt = us_cnt->next;
+ }
+
+ if(nodeCounter > 1 && isEmpty)
+ {
+ *Dwait = canDheapWait(currNode, dis0, *DmaxDis);
+
+ if(*Dwait)
+ {
+ isEmpty = IsHeapEmpty(dheap);
+ insertNodeIntoHeap(dheap, dis0, indexInArray);
+ ctg4heapArray[indexInArray].us_shut4dheap = 1;
+
+ if(isEmpty)
+ return 1;
+ else
+ continue;
+ }
+ }
+
+ ds_cnt = ctgInHeap->ds_shut4dheap ? NULL : contig_array[currNode].downwardConnect;
+
+ while(ds_cnt)
+ {
+ if(ds_cnt->deleted || ds_cnt->mask || contig_array[ds_cnt->contigID].inSubGraph)
+ {
+ ds_cnt = ds_cnt->next;
+ continue;
+ }
+
+ tempNode = ds_cnt->contigID;
+ dis = dis0 + ds_cnt->gapLen + (int)contig_array[tempNode].length;
+ ret = dispatch1node(dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis);
+
+ if(ret == 0)
+ return 0;
+ else if(ret < 0)
+ *Uwait = 0;
+ } // for each downstream connections
+ } // for each node comes off the heap
+
+ *Dwait = 1;
+ return 1;
+}
+
+static boolean canUheapWait(unsigned int currNode, int dis, int UmaxDis)
+{
+ int temp_len = contig_array[currNode].length;
+
+ if(-dis + temp_len < UmaxDis)
+ return 0;
+ else
+ return 1;
+}
+
+static boolean workOnUheap(FibHeap *dheap, FibHeap *uheap, boolean *Dwait, boolean *Uwait,
+ int *DmaxDis, int *UmaxDis, int maxNodes)
+{
+ if(*Uwait)
+ return 1;
+
+ unsigned int currNode, twin, tempNode;
+ CTGinHEAP *ctgInHeap;
+ int indexInArray;
+ CONNECT *us_cnt, *ds_cnt;
+ int dis0, dis;
+ boolean ret, isEmpty;
+
+ while((indexInArray = removeNextNodeFromHeap(uheap)) != 0)
+ {
+ ctgInHeap = &ctg4heapArray[indexInArray];
+ currNode = ctgInHeap->ctgID;
+ dis0 = ctgInHeap->dis;
+
+ isEmpty = IsHeapEmpty(uheap);
+ ds_cnt = ctgInHeap->ds_shut4uheap ? NULL : contig_array[currNode].downwardConnect;
+
+ while(ds_cnt)
+ {
+ if(ds_cnt->deleted || ds_cnt->mask || contig_array[ds_cnt->contigID].inSubGraph)
+ {
+ ds_cnt = ds_cnt->next;
+ continue;
+ }
+
+ tempNode = ds_cnt->contigID;
+ dis = dis0 + ds_cnt->gapLen + contig_array[tempNode].length;
+ ret = dispatch1node(dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis);
+
+ if(ret == 0)
+ return 0;
+ else if(ret > 0)
+ *Dwait = 0;
+
+ } // for each downstream connections
+
+ if(nodeCounter > 1 && isEmpty)
+ {
+ *Uwait = canUheapWait(currNode, dis0, *UmaxDis);
+
+ if(*Uwait)
+ {
+ isEmpty = IsHeapEmpty(uheap);
+ insertNodeIntoHeap(uheap, dis0, indexInArray);
+ ctg4heapArray[indexInArray].ds_shut4uheap = 1;
+
+ if(isEmpty)
+ return 1;
+ else
+ continue;
+ }
+ }
+
+ twin = getTwinCtg(currNode);
+ us_cnt = ctgInHeap->us_shut4uheap ? NULL : contig_array[twin].downwardConnect;
+
+ while(us_cnt)
+ {
+ if(us_cnt->deleted || us_cnt->mask ||
+ contig_array[getTwinCtg(us_cnt->contigID)].inSubGraph)
+ {
+ us_cnt = us_cnt->next;
+ continue;
+ }
+
+ tempNode = getTwinCtg(us_cnt->contigID);
+
+ if(contig_array[tempNode].inSubGraph)
+ {
+ us_cnt = us_cnt->next;
+ continue;
+ }
+
+ dis = dis0 - us_cnt->gapLen - contig_array[twin].length;
+
+ ret = dispatch1node(dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis);
+
+ if(ret == 0)
+ return 0;
+ else if(ret > 0)
+ *Dwait = 1;
+
+ us_cnt = us_cnt->next;
+ }
+
+ } // for each node comes off the heap
+
+ *Uwait = 1;
+ return 1;
+}
+
+static boolean pickUpGeneralSubgraph(unsigned int node1, int maxNodes)
+{
+ FibHeap *Uheap = newFibHeap(); // heap for upstream contigs to node1
+ FibHeap *Dheap = newFibHeap();
+ int UmaxDis; // max distance upstream to node1
+ int DmaxDis;
+ boolean Uwait; // wait signal for Uheap
+ boolean Dwait;
+ int dis;
+ boolean ret;
+
+ //initiate: node1 is put to array once, and to both Dheap and Uheap
+ dis = 0;
+ nodeCounter = 1;
+ putNodeInArray(node1, maxNodes, dis);
+ insertNodeIntoHeap(Dheap, dis, nodeCounter);
+ ctg4heapArray[nodeCounter].us_shut4dheap = 1;
+ Dwait = 0;
+ DmaxDis = 0;
+
+ insertNodeIntoHeap(Uheap, dis, nodeCounter);
+ ctg4heapArray[nodeCounter].ds_shut4uheap = 1;
+ Uwait = 1;
+ UmaxDis = contig_array[node1].length;
+
+ while(1)
+ {
+ ret = workOnDheap(Dheap, Uheap, &Dwait, &Uwait, &DmaxDis, &UmaxDis, maxNodes);
+
+ if(!ret)
+ {
+ setInGraph(0);
+ destroyHeap(Dheap);
+ destroyHeap(Uheap);
+ return 0;
+ }
+
+ ret = workOnUheap(Dheap, Uheap, &Dwait, &Uwait, &DmaxDis, &UmaxDis, maxNodes);
+
+ if(!ret)
+ {
+ setInGraph(0);
+ destroyHeap(Dheap);
+ destroyHeap(Uheap);
+ return 0;
+ }
+
+ if(Uwait && Dwait)
+ {
+ destroyHeap(Dheap);
+ destroyHeap(Uheap);
+ return 1;
+ }
+ }
+
+}
+
+static int cmp_ctg(const void *a, const void *b)
+{
+ CTGinHEAP *A, *B;
+ A = (CTGinHEAP *)a;
+ B = (CTGinHEAP *)b;
+
+ if(A->dis > B->dis)
+ return 1;
+ else if(A->dis == B->dis)
+ return 0;
+ else
+ return -1;
+}
+
+static boolean checkEligible()
+{
+ unsigned int firstNode = ctg4heapArray[1].ctgID;
+ unsigned int twin;
+ int i;
+ boolean flag = 0;
+
+ //check if the first node has incoming link from twin of any node in subgraph
+ // or it has multi outgoing links bound to incoming links
+ twin = getTwinCtg(firstNode);
+ CONNECT *ite_cnt = contig_array[twin].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->deleted || ite_cnt->mask)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if(contig_array[ite_cnt->contigID].inSubGraph)
+ {
+ /*
+ if(firstNode==3693)
+ printf("eligible link %d -> %d\n",twin,ite_cnt->contigID);
+ */
+ return 0;
+ }
+
+ if(ite_cnt->prevInScaf)
+ {
+ if(flag)
+ return 0;
+
+ flag = 1;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ //check if the last node has outgoing link to twin of any node in subgraph
+ // or it has multi outgoing links bound to incoming links
+ unsigned int lastNode = ctg4heapArray[nodeCounter].ctgID;
+ ite_cnt = contig_array[lastNode].downwardConnect;
+ flag = 0;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->deleted || ite_cnt->mask)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ twin = getTwinCtg(ite_cnt->contigID);
+
+ if(contig_array[twin].inSubGraph)
+ {
+ /*
+ if(firstNode==3693)
+ printf("eligible link %d -> %d\n",lastNode,ite_cnt->contigID);
+ */
+ return 0;
+ }
+
+ if(ite_cnt->prevInScaf)
+ {
+ if(flag)
+ return 0;
+
+ flag = 1;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ //check if any node has outgoing link to node outside the subgraph
+ for(i = 1; i < nodeCounter; i++)
+ {
+ ite_cnt = contig_array[ctg4heapArray[i].ctgID].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->deleted || ite_cnt->mask)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if(!contig_array[ite_cnt->contigID].inSubGraph)
+ {
+ /*
+ printf("eligible check: ctg %d links to ctg %d\n",
+ ctg4heapArray[i].ctgID,ite_cnt->contigID);
+ */
+ return 0;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+ }
+
+ //check if any node has incoming link from node outside the subgraph
+ for(i = 2; i <= nodeCounter; i++)
+ {
+ twin = getTwinCtg(ctg4heapArray[i].ctgID);
+ ite_cnt = contig_array[twin].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->deleted || ite_cnt->mask)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if(!contig_array[getTwinCtg(ite_cnt->contigID)].inSubGraph)
+ {
+ /*
+ printf("eligible check: ctg %d links to ctg %d\n",
+ ctg4heapArray[i].ctgID,ite_cnt->contigID);
+ */
+ return 0;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+ }
+
+ return 1;
+}
+
+//put nodes in sub-graph in a line
+static void arrangeNodes_general()
+{
+ int i, gap;
+ CONNECT *ite_cnt, *temp_cnt, *bal_cnt, *prev_cnt, *next_cnt;
+ unsigned int node1, node2;
+ unsigned int bal_nd1, bal_nd2;
+
+ //delete original connections
+ for(i = 1; i <= nodeCounter; i++)
+ {
+ node1 = ctg4heapArray[i].ctgID;
+ ite_cnt = contig_array[node1].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->mask || ite_cnt->deleted || !contig_array[ite_cnt->contigID].inSubGraph)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ ite_cnt->deleted = 1;
+ setNextInScaf(ite_cnt, NULL);
+ setPrevInScaf(ite_cnt, 0);
+ ite_cnt = ite_cnt->next;
+ }
+
+ bal_nd1 = getTwinCtg(node1);
+ ite_cnt = contig_array[bal_nd1].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->mask || ite_cnt->deleted || !contig_array[getTwinCtg(ite_cnt->contigID)].inSubGraph)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ ite_cnt->deleted = 1;
+ setNextInScaf(ite_cnt, NULL);
+ setPrevInScaf(ite_cnt, 0);
+ ite_cnt = ite_cnt->next;
+ }
+ }
+
+ //create new connections
+ prev_cnt = next_cnt = NULL;
+
+ for(i = 1; i < nodeCounter; i++)
+ {
+ node1 = ctg4heapArray[i].ctgID;
+ node2 = ctg4heapArray[i + 1].ctgID;
+ bal_nd1 = getTwinCtg(node1);
+ bal_nd2 = getTwinCtg(node2);
+ gap = ctg4heapArray[i + 1].dis - ctg4heapArray[i].dis
+ - contig_array[node2].length;
+ temp_cnt = getCntBetween(node1, node2);
+
+ if(temp_cnt)
+ {
+ temp_cnt->deleted = 0;
+ temp_cnt->mask = 0;
+ //temp_cnt->gapLen = gap;
+ bal_cnt = getCntBetween(bal_nd2, bal_nd1);
+ bal_cnt->deleted = 0;
+ bal_cnt->mask = 0;
+ //bal_cnt->gapLen = gap;
+ }
+ else
+ {
+ temp_cnt = allocateCN(node2, gap);
+
+ if(cntLookupTable)
+ putCnt2LookupTable(node1, temp_cnt);
+
+ temp_cnt->next = contig_array[node1].downwardConnect;
+ contig_array[node1].downwardConnect = temp_cnt;
+ bal_cnt = allocateCN(bal_nd1, gap);
+
+ if(cntLookupTable)
+ putCnt2LookupTable(bal_nd2, bal_cnt);
+
+ bal_cnt->next = contig_array[bal_nd2].downwardConnect;
+ contig_array[bal_nd2].downwardConnect = bal_cnt;
+ }
+
+ if(prev_cnt)
+ {
+ setNextInScaf(prev_cnt, temp_cnt);
+ setPrevInScaf(temp_cnt, 1);
+ }
+
+ if(next_cnt)
+ {
+ setNextInScaf(bal_cnt, next_cnt);
+ setPrevInScaf(next_cnt, 1);
+ }
+
+ prev_cnt = temp_cnt;
+ next_cnt = bal_cnt;
+ }
+
+ //re-binding connection at both ends
+ bal_nd2 = getTwinCtg(ctg4heapArray[1].ctgID);
+ ite_cnt = contig_array[bal_nd2].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->deleted || ite_cnt->mask)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if(ite_cnt->prevInScaf)
+ break;
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ if(ite_cnt)
+ {
+ bal_nd1 = ite_cnt->contigID;
+ node1 = getTwinCtg(bal_nd1);
+ node2 = ctg4heapArray[1].ctgID;
+ temp_cnt = checkConnect(node1, node2);
+ bal_cnt = ite_cnt;
+ next_cnt = checkConnect(ctg4heapArray[1].ctgID, ctg4heapArray[2].ctgID);
+ prev_cnt = checkConnect(getTwinCtg(ctg4heapArray[2].ctgID), getTwinCtg(ctg4heapArray[1].ctgID));
+
+ if(temp_cnt)
+ {
+ setNextInScaf(temp_cnt, next_cnt);
+ setPrevInScaf(temp_cnt->nextInScaf, 0);
+ setPrevInScaf(next_cnt, 1);
+ setNextInScaf(prev_cnt, bal_cnt);
+ }
+ }
+
+ node1 = ctg4heapArray[nodeCounter].ctgID;
+ ite_cnt = contig_array[node1].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->deleted || ite_cnt->mask)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if(ite_cnt->prevInScaf)
+ break;
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ if(ite_cnt)
+ {
+ node2 = ite_cnt->contigID;
+ bal_nd1 = getTwinCtg(node1);
+ bal_nd2 = getTwinCtg(node2);
+ temp_cnt = ite_cnt;
+ bal_cnt = checkConnect(bal_nd2, bal_nd1);
+ next_cnt = checkConnect(getTwinCtg(ctg4heapArray[nodeCounter].ctgID),
+ getTwinCtg(ctg4heapArray[nodeCounter - 1].ctgID));
+ prev_cnt = checkConnect(ctg4heapArray[nodeCounter - 1].ctgID, ctg4heapArray[nodeCounter].ctgID);
+ setNextInScaf(prev_cnt, temp_cnt);
+ setNextInScaf(bal_cnt, next_cnt);
+ setPrevInScaf(next_cnt, 1);
+ }
+}
+//check if contigs next to each other have reasonable overlap
+boolean checkOverlapInBetween_general(double tolerance)
+{
+ int i, gap;
+ unsigned int node;
+ int lenSum, lenOlp;
+ lenSum = lenOlp = 0;
+
+ for(i = 1; i <= nodeCounter; i++)
+ {
+ node = ctg4heapArray[i].ctgID;
+ lenSum += contig_array[node].length;
+ }
+
+ if(lenSum < 1)
+ return 1;
+
+ for(i = 1; i < nodeCounter; i++)
+ {
+ gap = ctg4heapArray[i + 1].dis - ctg4heapArray[i].dis
+ - contig_array[ctg4heapArray[i + 1].ctgID].length;
+
+ if(-gap > 0)
+ lenOlp += -gap;
+
+ //if(-gap>ins_size_var)
+
+ }
+
+ double olp_pect = (double)lenOlp / lenSum;
+ fprintf(stderr, "[%s]existing with olp_pect %.3f.\n", __FUNCTION__, olp_pect);
+
+ if(olp_pect > tolerance)
+ {
+ return 0;
+ }
+
+ return 1;
+}
+
+//check if there's any connect indicates the opposite order between nodes in sub-graph
+static boolean checkConflictCnt_general(double tolerance)
+{
+ int i, j;
+ int supportCounter = 0;
+ int objectCounter = 0;
+ CONNECT *cnt;
+
+ for(i = 1; i < nodeCounter; i++)
+ {
+ for(j = i + 1; j <= nodeCounter; j++)
+ {
+ //cnt=getCntBetween(nodesInSubInOrder[j],nodesInSubInOrder[i]);
+ cnt = checkConnect(ctg4heapArray[i].ctgID, ctg4heapArray[j].ctgID);
+
+ if(cnt)
+ supportCounter += cnt->weight;
+
+ cnt = checkConnect(ctg4heapArray[j].ctgID, ctg4heapArray[i].ctgID);
+
+ if(cnt)
+ objectCounter += cnt->weight;
+
+ //return 1;
+ }
+ }
+
+ if(supportCounter < 1)
+ return 1;
+
+ if((double)objectCounter / supportCounter < tolerance)
+ return 0;
+
+ return 1;
+}
+// turn sub-graph to linear struct
+static void general_linearization(boolean strict)
+{
+ unsigned int i;
+ int subCounter = 0;
+ int out_num;
+ boolean flag;
+ int conflCounter = 0, overlapCounter = 0, eligibleCounter = 0;
+ double overlapTolerance, conflTolerance;
+
+ for(i = num_ctg; i > 0; i--)
+ {
+ if(contig_array[i].mask)
+ continue;
+
+ out_num = validConnect(i, NULL);
+
+ if(out_num < 2)
+ continue;
+
+ //flag = pickSubGraph(i,strict);
+ flag = pickUpGeneralSubgraph(i, MaxNodeInSub);
+
+ if(!flag)
+ continue;
+
+ subCounter++;
+ qsort(&ctg4heapArray[1], nodeCounter, sizeof(CTGinHEAP), cmp_ctg);
+ flag = checkEligible();
+
+ if(!flag)
+ {
+ eligibleCounter++;
+ setInGraph(0);
+ continue;
+ }
+
+ if(strict)
+ {
+ overlapTolerance = OverlapPercent;
+ conflTolerance = ConflPercent;
+ }
+ else
+ {
+ overlapTolerance = 2 * OverlapPercent;
+ conflTolerance = 2 * ConflPercent;
+ }
+
+ flag = checkOverlapInBetween_general(overlapTolerance);
+
+ if(!flag)
+ {
+ overlapCounter++;
+ setInGraph(0);
+ continue;
+ }
+
+ flag = checkConflictCnt_general(conflTolerance);
+
+ if(flag)
+ {
+ conflCounter++;
+ setInGraph(0);
+ continue;
+ }
+
+ arrangeNodes_general();
+ setInGraph(0);
+ }
+
+ fprintf(stdout, "[%s]Picked %d subgraphs,%d have conflicting connections,%d have significant overlapping, %d eligible\n",
+ __FUNCTION__, subCounter, conflCounter, overlapCounter, eligibleCounter);
+
+}
+
+/**** the fowllowing codes for detecting and break down scaffold at weak point **********/
+// mark connections in scaffolds made by small pe
+static void smallScaf()
+{
+ unsigned int i, ctg, bal_ctg, prevCtg;
+ int counter = 0;
+ CONNECT *bindCnt, *cnt;
+
+ for(i = 1; i <= num_ctg; i++)
+ contig_array[i].flag = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect)
+ continue;
+
+ bindCnt = getBindCnt(i);
+
+ if(!bindCnt)
+ continue;
+
+ counter++;
+
+ contig_array[i].flag = 1;
+ contig_array[getTwinCtg(i)].flag = 1;
+ prevCtg = getTwinCtg(i);
+
+ while(bindCnt)
+ {
+ ctg = bindCnt->contigID;
+ bal_ctg = getTwinCtg(ctg);
+ bindCnt->bySmall = 1;
+ cnt = getCntBetween(bal_ctg, prevCtg);
+
+ if(cnt)
+ cnt->bySmall = 1;
+
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ prevCtg = bal_ctg;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ ctg = getTwinCtg(i);
+ bindCnt = getBindCnt(ctg);
+ prevCtg = i;
+
+ while(bindCnt)
+ {
+ ctg = bindCnt->contigID;
+ bal_ctg = getTwinCtg(ctg);
+ bindCnt->bySmall = 1;
+ cnt = getCntBetween(bal_ctg, prevCtg);
+
+ if(cnt)
+ cnt->bySmall = 1;
+
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ prevCtg = bal_ctg;
+ bindCnt = bindCnt->nextInScaf;
+ }
+ }
+
+ //printf("Report from smallScaf: %d scaffolds by smallPE\n",counter);
+}
+
+static boolean putItem2Sarray(unsigned int scaf, int wt, DARRAY *SCAF, DARRAY *WT, int counter)
+{
+ int i;
+ unsigned int *scafP, *wtP;
+
+ for(i = 0; i < counter; i++)
+ {
+ scafP = (unsigned int *)darrayGet(SCAF, i);
+
+ if((*scafP) == scaf)
+ {
+ wtP = (unsigned int *)darrayGet(WT, i);
+ *wtP = (*wtP + wt);
+ return 0;
+ }
+ }
+
+ scafP = (unsigned int *)darrayPut(SCAF, counter);
+ wtP = (unsigned int *)darrayPut(WT, counter);
+ *scafP = scaf;
+ *wtP = wt;
+ return 1;
+}
+
+static int getDSLink2Scaf(STACK *scafStack, DARRAY *SCAF, DARRAY *WT)
+{
+ CONNECT *ite_cnt;
+ unsigned int ctg, targetCtg, *pt;
+ int counter = 0;
+ boolean inc;
+
+ stackRecover(scafStack);
+
+ while((pt = (unsigned int *)stackPop(scafStack)) != NULL)
+ {
+ ctg = *pt;
+
+ if(contig_array[ctg].mask || !contig_array[ctg].downwardConnect)
+ continue;
+
+ ite_cnt = contig_array[ctg].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->deleted || ite_cnt->mask || ite_cnt->singleInScaf
+ || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ targetCtg = ite_cnt->contigID;
+
+ if(contig_array[ctg].from_vt == contig_array[targetCtg].from_vt) // on the same scaff
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ inc = putItem2Sarray(contig_array[targetCtg].from_vt, ite_cnt->weight, SCAF, WT, counter);
+
+ if(inc)
+ counter++;
+
+ ite_cnt = ite_cnt->next;
+ }
+ }
+
+ return counter;
+
+}
+
+static int getScaffold(unsigned int start, STACK *scafStack)
+{
+ int len = contig_array[start].length;
+ unsigned int *pt, ctg;
+
+ emptyStack(scafStack);
+ pt = (unsigned int *)stackPush(scafStack);
+ *pt = start;
+ CONNECT *bindCnt = getBindCnt(start);
+
+ while(bindCnt)
+ {
+ ctg = bindCnt->contigID;
+ pt = (unsigned int *)stackPush(scafStack);
+ *pt = ctg;
+ len += contig_array[ctg].length;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ stackBackup(scafStack);
+ return len;
+}
+
+static boolean isLinkReliable(DARRAY *WT, int count)
+{
+ int i;
+
+ for(i = 0; i < count; i++)
+ if(*(int *)darrayGet(WT, i) >= weakPE)
+ return 1;
+
+ return 0;
+}
+
+static int getWtFromSarray(DARRAY *SCAF, DARRAY *WT, int count, unsigned int scaf)
+{
+ int i;
+
+ for(i = 0; i < count; i++)
+ if(*(unsigned int *)darrayGet(SCAF, i) == scaf)
+ return *(int *)darrayGet(WT, i);
+
+ return 0;
+}
+
+static void switch2twin(STACK *scafStack)
+{
+ unsigned int *pt;
+ stackRecover(scafStack);
+
+ while((pt = (unsigned int *)stackPop(scafStack)) != NULL)
+ * pt = getTwinCtg(*pt);
+}
+/*
+ ------>
+ scaf1 --- --- -- -- ---
+ scaf2 -- --- --- --
+ ---->
+*/
+static boolean checkScafConsist(STACK *scafStack1, STACK *scafStack2)
+{
+ DARRAY *downwardTo1 = (DARRAY *)createDarray(1000, sizeof(unsigned int)); // scaf links to those scaffolds
+ DARRAY *downwardTo2 = (DARRAY *)createDarray(1000, sizeof(unsigned int));
+ DARRAY *downwardWt1 = (DARRAY *)createDarray(1000, sizeof(unsigned int)); // scaf links to scaffolds with those wt
+ DARRAY *downwardWt2 = (DARRAY *)createDarray(1000, sizeof(unsigned int));
+
+ int linkCount1 = getDSLink2Scaf(scafStack1, downwardTo1, downwardWt1);
+ int linkCount2 = getDSLink2Scaf(scafStack2, downwardTo2, downwardWt2);
+
+ if(!linkCount1 || !linkCount2)
+ {
+ freeDarray(downwardTo1);
+ freeDarray(downwardTo2);
+ freeDarray(downwardWt1);
+ freeDarray(downwardWt2);
+ return 1;
+ }
+
+ boolean flag1 = isLinkReliable(downwardWt1, linkCount1);
+ boolean flag2 = isLinkReliable(downwardWt2, linkCount2);
+
+ if(!flag1 || !flag2)
+ {
+ freeDarray(downwardTo1);
+ freeDarray(downwardTo2);
+ freeDarray(downwardWt1);
+ freeDarray(downwardWt2);
+ return 1;
+ }
+
+ unsigned int scaf;
+ int i, wt1, wt2, ret = 1;
+
+ for(i = 0; i < linkCount1; i++)
+ {
+ wt1 = *(int *)darrayGet(downwardWt1, i);
+
+ if(wt1 < weakPE)
+ continue;
+
+ scaf = *(unsigned int *)darrayGet(downwardTo1, i);
+ wt2 = getWtFromSarray(downwardTo2, downwardWt2, linkCount2, scaf);
+
+ if(wt2 < 1)
+ {
+ //fprintf(stderr,"Inconsistant link to %d\n",scaf);
+ ret = 0;
+ break;
+ }
+ }
+
+ freeDarray(downwardTo1);
+ freeDarray(downwardTo2);
+ freeDarray(downwardWt1);
+ freeDarray(downwardWt2);
+ return ret;
+}
+
+static void setBreakPoints(DARRAY *ctgArray, int count, int weakest,
+ int *start, int *finish)
+{
+ int index = weakest - 1;
+ unsigned int thisCtg;
+ unsigned int nextCtg = *(unsigned int *)darrayGet(ctgArray, weakest);
+ CONNECT *cnt;
+ *start = weakest;
+
+ while(index >= 0)
+ {
+ thisCtg = *(unsigned int *)darrayGet(ctgArray, index);
+ cnt = getCntBetween(thisCtg, nextCtg);
+
+ if(cnt->maxGap > 2)
+ break;
+ else
+ *start = index;
+
+ nextCtg = thisCtg;
+ index--;
+ }
+
+ unsigned int prevCtg = *(unsigned int *)darrayGet(ctgArray, weakest + 1);
+ *finish = weakest + 1;
+ index = weakest + 2;
+
+ while(index < count)
+ {
+ thisCtg = *(unsigned int *)darrayGet(ctgArray, index);
+ cnt = getCntBetween(prevCtg, thisCtg);
+
+ if(cnt->maxGap > 2)
+ break;
+ else
+ *finish = index;
+
+ prevCtg = thisCtg;
+ index++;
+ }
+
+}
+
+static void changeScafEnd(STACK *scafStack, unsigned int end)
+{
+
+ unsigned int ctg, *pt;
+ unsigned int start = getTwinCtg(end);
+ stackRecover(scafStack);
+
+ while((pt = (unsigned int *)stackPop(scafStack)) != NULL)
+ {
+ ctg = *pt;
+ contig_array[ctg].to_vt = end;
+ contig_array[getTwinCtg(ctg)].from_vt = start;
+ }
+}
+
+static void changeScafBegin(STACK *scafStack, unsigned int start)
+{
+
+ unsigned int ctg, *pt;
+ unsigned int end = getTwinCtg(start);
+ stackRecover(scafStack);
+
+ while((pt = (unsigned int *)stackPop(scafStack)) != NULL)
+ {
+ ctg = *pt;
+ contig_array[ctg].from_vt = start;
+ contig_array[getTwinCtg(ctg)].to_vt = end;
+ }
+}
+// break down scaffolds poorly supported by longer PE
+static void detectBreakScaf()
+{
+ fprintf(stderr, "[%s]entering this function.\n", __FUNCTION__);
+ unsigned int i, avgPE, scafLen, len, ctg, bal_ctg, prevCtg, thisCtg;
+ long long peCounter, linkCounter;
+ int num3, num5, weakPoint, tempCounter, j, t, counter = 0;
+ CONNECT *bindCnt, *cnt, *weakCnt;
+
+ STACK *scafStack1 = (STACK *)createStack(1000, sizeof(unsigned int));
+ STACK *scafStack2 = (STACK *)createStack(1000, sizeof(unsigned int));
+
+ for(i = 1; i <= num_ctg; i++)
+ contig_array[i].flag = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect)
+ continue;
+
+ bindCnt = getBindCnt(i);
+
+ if(!bindCnt)
+ continue;
+
+ //first scan get the average coverage by longer pe
+ num5 = num3 = peCounter = linkCounter = 0;
+ scafLen = contig_array[i].length;
+ ctg = i;
+ *(unsigned int *)darrayPut(scaf5, num5++) = i;
+ contig_array[i].flag = 1;
+ contig_array[getTwinCtg(i)].flag = 1;
+
+ while(bindCnt)
+ {
+ if(!bindCnt->bySmall)
+ break;
+
+ linkCounter++;
+ peCounter += bindCnt->maxGap;
+ ctg = bindCnt->contigID;
+ scafLen += contig_array[ctg].length;
+ *(unsigned int *)darrayPut(scaf5, num5++) = ctg;
+ bal_ctg = getTwinCtg(ctg);
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ ctg = getTwinCtg(i);
+ bindCnt = getBindCnt(ctg);
+
+ while(bindCnt)
+ {
+ if(!bindCnt->bySmall)
+ break;
+
+ linkCounter++;
+ peCounter += bindCnt->maxGap;
+ ctg = bindCnt->contigID;
+ scafLen += contig_array[ctg].length;
+ bal_ctg = getTwinCtg(ctg);
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ *(unsigned int *)darrayPut(scaf3, num3++) = bal_ctg;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ if(linkCounter < 1 || scafLen < 5000)
+ continue;
+
+ avgPE = peCounter / linkCounter;
+
+ if(avgPE < 10)
+ continue;
+
+ tempCounter = 0;
+
+ for(j = num3 - 1; j >= 0; j--)
+ *(unsigned int *)darrayPut(tempArray, tempCounter++) =
+ *(unsigned int *)darrayGet(scaf3, j);
+
+ for(j = 0; j < num5; j++)
+ *(unsigned int *)darrayPut(tempArray, tempCounter++) =
+ *(unsigned int *)darrayGet(scaf5, j);
+
+ prevCtg = *(unsigned int *)darrayGet(tempArray, 0);
+ weakCnt = NULL;
+ weakPoint = 0;
+ len = contig_array[prevCtg].length;
+
+ for(t = 1; t < tempCounter; t++)
+ {
+ thisCtg = *(unsigned int *)darrayGet(tempArray, t);
+
+ if(len < 2000)
+ {
+ len += contig_array[thisCtg].length;
+ prevCtg = thisCtg;
+ continue;
+ }
+ else if(len > scafLen - 2000)
+ break;
+
+ len += contig_array[thisCtg].length;
+
+ if(contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
+ contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf)
+ {
+ prevCtg = thisCtg;
+ continue;
+ }
+
+ cnt = getCntBetween(prevCtg, thisCtg);
+
+ if(!weakCnt || weakCnt->maxGap > cnt->maxGap)
+ {
+ weakCnt = cnt;
+ weakPoint = t;
+ }
+
+ prevCtg = thisCtg;
+ }
+
+ if(!weakCnt || (weakCnt->maxGap > 2 && weakCnt->maxGap > avgPE / 5))
+ continue;
+
+ prevCtg = *(unsigned int *)darrayGet(tempArray, weakPoint - 1);
+ thisCtg = *(unsigned int *)darrayGet(tempArray, weakPoint);
+
+ if(contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
+ contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf)
+ {
+ printf("contig %d and %d not on the same scaff\n", prevCtg, thisCtg);
+ continue;
+ }
+
+ setConnectWP(prevCtg, thisCtg, 1);
+ /*
+ fprintf(stderr,"scaffold len %d, avg long pe cov %d (%ld/%ld)\n",
+ scafLen,avgPE,peCounter,linkCounter);
+ fprintf(stderr,"Weak connect (%d) between %d(%dth of %d) and %d\n"
+ ,weakCnt->maxGap,prevCtg,weakPoint-1,tempCounter,thisCtg);
+ */
+ // set start and end to break down the scaffold
+ int index1, index2;
+ setBreakPoints(tempArray, tempCounter, weakPoint - 1, &index1, &index2);
+ //fprintf(stderr,"break %d ->...-> %d\n",index1,index2);
+ unsigned int start = *(unsigned int *)darrayGet(tempArray, index1);
+ unsigned int finish = *(unsigned int *)darrayGet(tempArray, index2);
+ int len1 = getScaffold(getTwinCtg(start), scafStack1);
+ int len2 = getScaffold(finish, scafStack2);
+
+ if(len1 < 2000 || len2 < 2000)
+ continue;
+
+ switch2twin(scafStack1);
+ int flag1 = checkScafConsist(scafStack1, scafStack2);
+
+ switch2twin(scafStack1);
+ switch2twin(scafStack2);
+ int flag2 = checkScafConsist(scafStack2, scafStack1);
+
+ if(!flag1 || !flag2)
+ {
+ changeScafBegin(scafStack1, getTwinCtg(start));
+ changeScafEnd(scafStack2, getTwinCtg(finish));
+ //unbind links
+ unsigned int nextCtg = *(unsigned int *)darrayGet(tempArray, index1 + 1);
+ thisCtg = *(unsigned int *)darrayGet(tempArray, index1);
+ cnt = getCntBetween(getTwinCtg(nextCtg), getTwinCtg(thisCtg));
+
+ if(cnt->nextInScaf)
+ {
+ prevCtg = getTwinCtg(cnt->nextInScaf->contigID);
+ cnt->nextInScaf->prevInScaf = 0;
+ cnt = getCntBetween(prevCtg, thisCtg);
+ cnt->nextInScaf = NULL;
+ }
+
+ prevCtg = *(unsigned int *)darrayGet(tempArray, index2 - 1);
+ thisCtg = *(unsigned int *)darrayGet(tempArray, index2);
+ cnt = getCntBetween(prevCtg, thisCtg);
+
+ if(cnt->nextInScaf)
+ {
+ nextCtg = cnt->nextInScaf->contigID;
+ cnt->nextInScaf->prevInScaf = 0;
+ cnt = getCntBetween(getTwinCtg(nextCtg), getTwinCtg(thisCtg));
+ cnt->nextInScaf = NULL;
+ }
+
+ prevCtg = *(unsigned int *)darrayGet(tempArray, index1);
+
+ for(t = index1 + 1; t <= index2; t++)
+ {
+ thisCtg = *(unsigned int *)darrayGet(tempArray, t);
+ cnt = getCntBetween(prevCtg, thisCtg);
+ cnt->mask = 1;
+ cnt->nextInScaf = NULL;
+ cnt->prevInScaf = 0;
+ cnt = getCntBetween(getTwinCtg(thisCtg), getTwinCtg(prevCtg));
+ cnt->mask = 1;
+ cnt->nextInScaf = NULL;
+ cnt->prevInScaf = 0;
+ /*
+ fprintf(stderr,"(%d %d)/(%d %d) ",
+ prevCtg,thisCtg,getTwinCtg(thisCtg),getTwinCtg(prevCtg));
+ */
+ prevCtg = thisCtg;
+ }
+
+ //fprintf(stderr,": BREAKING\n");
+ counter++;
+ }
+ }
+
+ freeStack(scafStack1);
+ freeStack(scafStack2);
+ fprintf(stderr, "[%s]existing this function.\n", __FUNCTION__);
+ //printf("Report from checkScaf: %d scaffold segments broken\n",counter);
+}
+
+static boolean checkSimple(DARRAY *ctgArray, int count)
+{
+ int i;
+ unsigned int ctg;
+
+ for(i = 0; i < count; i++)
+ {
+ ctg = *(unsigned int *)darrayGet(ctgArray, i);
+ contig_array[ctg].flag = 0;
+ contig_array[getTwinCtg(ctg)].flag = 0;
+ }
+
+ for(i = 0; i < count; i++)
+ {
+ ctg = *(unsigned int *)darrayGet(ctgArray, i);
+
+ if(contig_array[ctg].flag)
+ return 0;
+
+ contig_array[ctg].flag = 1;
+ contig_array[getTwinCtg(ctg)].flag = 1;
+ }
+
+ return 1;
+
+}
+
+static void checkCircle()
+{
+ unsigned int i, ctg;
+ CONNECT *cn_temp1;
+ int counter = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ cn_temp1 = contig_array[i].downwardConnect;
+
+ while(cn_temp1)
+ {
+ if(cn_temp1->weak || cn_temp1->deleted)
+ {
+ cn_temp1 = cn_temp1->next;
+ continue;
+ }
+
+ ctg = cn_temp1->contigID;
+
+ if(checkConnect(ctg, i))
+ {
+ counter++;
+ maskContig(i, 1);
+ maskContig(ctg, 1);
+ }
+
+ cn_temp1 = cn_temp1->next;
+ }
+
+ }
+
+ //printf("%d circles removed \n",counter);
+}
diff --git a/fusion/output_scaffold.c b/fusion/output_scaffold.c
new file mode 100644
index 0000000..7c339d0
--- /dev/null
+++ b/fusion/output_scaffold.c
@@ -0,0 +1,76 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+void output_contig_graph(char *outfile)
+{
+ char name[256];
+ FILE *fp;
+ unsigned int i;
+
+ sprintf(name, "%s.contig.gvz", outfile);
+ fp = ckopen(name, "w");
+ fprintf(fp, "digraph G{\n");
+ fprintf(fp, "\tsize=\"512,512\";\n");
+
+ for(i = num_ctg; i > 0; i--)
+ {
+ fprintf(fp, "\tV%d -> V%d[label =\"%d(%d)\"];\n", contig_array[i].from_vt, contig_array[i].to_vt, i, contig_array[i].length);
+ }
+
+ fprintf(fp, "}\n");
+ fclose(fp);
+}
+void output_scaf(char *outfile)
+{
+ char name[256];
+ FILE *fp;
+ unsigned int i;
+ CONNECT *connect;
+ boolean flag;
+
+ sprintf(name, "%s.scaffold.gvz", outfile);
+ fp = ckopen(name, "w");
+ fprintf(fp, "digraph G{\n");
+ fprintf(fp, "\tsize=\"512,512\";\n");
+
+ for(i = num_ctg; i > 0; i--)
+ {
+ //if(contig_array[i].mask||!contig_array[i].downwardConnect)
+ if(!contig_array[i].downwardConnect)
+ continue;
+
+ connect = contig_array[i].downwardConnect;
+
+ while(connect)
+ {
+ //if(connect->mask||connect->deleted){
+ if(connect->deleted)
+ {
+ connect = connect->next;
+ continue;
+ }
+
+ if(connect->prevInScaf || connect->nextInScaf)
+ flag = 1;
+ else
+ flag = 0;
+
+ if(!connect->mask)
+ fprintf(fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\"];\n"
+ , i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
+ connect->gapLen, flag, connect->weight);
+ else
+ fprintf(fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\", color = red];\n"
+ , i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
+ connect->gapLen, flag, connect->weight);
+
+ connect = connect->next;
+ }
+ }
+
+ fprintf(fp, "}\n");
+ fclose(fp);
+}
+
diff --git a/fusion/potential.c b/fusion/potential.c
new file mode 100644
index 0000000..75f347b
--- /dev/null
+++ b/fusion/potential.c
@@ -0,0 +1,268 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+#include "dfibHeap.h"
+#include "fibHeap.h"
+#include "darray.h"
+
+
+//static CTGinHEAP *ctg4heapArray;
+extern int inputLinks(FILE *fp, int insertS, char *line);
+//unsigned int traverse(unsigned int node,int *far_count,unsigned int *farpath,
+//int *curr_count,unsigned int *currpath,int *used_count,unsigned int *used,int *max_dist,int *node_dist);
+//static int *sub_arr;
+//static int sub_counter=0;
+int rev_comp (const void *a, const void *b)
+{
+ return ( *(int *)b - * (int *)a );
+}
+void potential()
+{
+
+ char name[256], *line;
+ FILE *fp;
+ int i;
+ int flag2;
+
+ loadUpdatedEdges(graphfile);
+
+ if(!pes)
+ loadPEgrads(graphfile);
+
+ sprintf(name, "%s.links", graphfile);
+ fp = ckopen(name, "r");
+
+ createCntMemManager();
+ createCntLookupTable();
+
+ lineLen = 1024;
+ line = (char *)ckalloc(lineLen * sizeof(char));
+
+ fgets(line, lineLen, fp);
+ line[0] = '\0';
+ fprintf(stderr, "[%s]before inputLinks loop.\n", __FUNCTION__);
+
+ for(i = 0; i < gradsCounter; i++)
+ {
+ flag2 = inputLinks(fp, pes[i].insertS, line);
+ }
+
+ fprintf(stderr, "[%s]links file loaded.\n", __FUNCTION__);
+ //ctg4heapArray=ckalloc(100000*sizeof(CTGinHEAP));
+ //unsigned int *farthest_path=ckalloc(1000000*sizeof(unsigned int));
+ //int farthest_boarder;
+ //STACK *track=createStack(100000,sizeof(unsigned int));
+ unsigned int *curr_path = ckalloc(1000000 * sizeof(unsigned int));
+ int curr_boarder;
+ unsigned int *dist = ckalloc(10000000 * sizeof(unsigned int));
+ //int dist_boarder;
+ int *predict = ckalloc(1000000 * sizeof(int));
+ int pred_count = 0;
+ int used = 0;
+
+ //CONNECT *cnt_stack=ckalloc(10000000*sizeof(CONNECT*));
+ //int cnt_count=0;
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(contig_array[i].inSubGraph)
+ continue;
+
+ if(!contig_array[i].downwardConnect)
+ {
+ predict[pred_count++] = contig_array[i].length;
+ contig_array[i].inSubGraph = 1;
+ contig_array[getTwinCtg(i)].inSubGraph = 1;
+ fprintf(stderr, "[%d] traversed %d %d .\n", __LINE__, i, getTwinCtg(i));
+ ++used;
+ continue;
+ }
+
+ //depth first traversal
+ //farthest_boarder=0;
+ curr_boarder = 0;
+ //used_boarder=0;
+ int max_dist = 0;
+ //int node_dist=0;
+ int len = 0;
+ //contig_array[i].inSubGraph=1;
+ //contig_array[getTwinCtg(i)].inSubGraph=1;
+ //if(contig_array[i].downwardConnect){
+ //traverse(i,&farthest_boarder,farthest_path,&curr_boarder,curr_path,&used_boarder,used,&max_dist,&node_dist);
+ //cnt_stack[0]=contig_array[i].downwardConnect;//put in stack
+ //++cnt_count;
+
+ contig_array[i].inSubGraph = 1;
+ contig_array[getTwinCtg(i)].inSubGraph = 1;
+ fprintf(stderr, "[%d] traversed %d %d .\n", __LINE__, i, getTwinCtg(i));
+ ++used;
+ curr_path[curr_boarder] = i;
+ dist[curr_boarder] = 0;
+
+ while(curr_boarder >= 0)
+ {
+ int curr_bound = curr_boarder;
+ int curr_node = curr_path[curr_boarder--];
+ int base_dist = dist[curr_bound];
+ CONNECT *curr_cnt = contig_array[curr_node].downwardConnect;
+
+ while(curr_cnt) //push all adjacent connect
+ {
+ if(curr_cnt->weight < 3 || contig_array[curr_cnt->contigID].inSubGraph
+ || contig_array[getTwinCtg(curr_cnt->contigID)].inSubGraph)
+ {
+ curr_cnt = curr_cnt->next;
+ continue;
+ }
+
+ curr_path[++curr_boarder] = curr_cnt->contigID;
+ contig_array[curr_cnt->contigID].inSubGraph = 1;
+ contig_array[getTwinCtg(curr_cnt->contigID)].inSubGraph = 1;
+ fprintf(stderr, "[%d] traversed %d %d .\n", __LINE__, curr_cnt->contigID, getTwinCtg(curr_cnt->contigID));
+ ++used;
+ dist[curr_boarder] = base_dist +
+ curr_cnt->gapLen + contig_array[curr_cnt->contigID].length;
+
+ if(dist[curr_boarder] > max_dist)
+ max_dist = dist[curr_boarder];
+
+ //fprintf(stderr,"curr_boarder %d node_dist %d max_dist %d \n",curr_boarder,
+ // dist[curr_boarder],max_dist);
+ curr_cnt = curr_cnt->next;
+ }
+
+ }
+
+ len += max_dist;
+
+ //}
+ if(contig_array[getTwinCtg(i)].downwardConnect)
+ {
+ curr_boarder = 0;
+ curr_path[curr_boarder] = i;
+ dist[curr_boarder] = 0;
+
+ while(curr_boarder >= 0)
+ {
+ int curr_bound = curr_boarder;
+ int curr_node = curr_path[curr_boarder--];
+ int base_dist = dist[curr_bound];
+ CONNECT *curr_cnt = contig_array[curr_node].downwardConnect;
+
+ while(curr_cnt) //push all adjacent connect
+ {
+ if(curr_cnt->weight < 3 || contig_array[curr_cnt->contigID].inSubGraph
+ || contig_array[getTwinCtg(curr_cnt->contigID)].inSubGraph)
+ {
+ curr_cnt = curr_cnt->next;
+ continue;
+ }
+
+ curr_path[++curr_boarder] = curr_cnt->contigID;
+ contig_array[curr_cnt->contigID].inSubGraph = 1;
+ contig_array[getTwinCtg(curr_cnt->contigID)].inSubGraph = 1;
+ fprintf(stderr, "[%d] traversed %d %d .\n", __LINE__, curr_cnt->contigID, getTwinCtg(curr_cnt->contigID));
+ ++used;
+ dist[curr_boarder] = base_dist +
+ curr_cnt->gapLen + contig_array[curr_cnt->contigID].length;
+
+ if(dist[curr_boarder] > max_dist)
+ max_dist = dist[curr_boarder];
+
+ //fprintf(stderr,"curr_boarder %d node_dist %d max_dist %d \n",curr_boarder,
+ // dist[curr_boarder],max_dist);
+ curr_cnt = curr_cnt->next;
+ }
+
+ }
+
+ len += max_dist;
+ }
+
+ /*int ii;
+ for(ii=0;ii<used_boarder;++ii){
+ contig_array[used[ii]].inSubGraph=0;
+ }*/
+ if(len != 0)
+ {
+ predict[pred_count++] = len;
+ fprintf(stderr, "[%s]contig %d effective with length %d.\n", __FUNCTION__, i, len);
+ }
+
+ fprintf(stderr, "[%s]contig %d over.\n", __FUNCTION__, i);
+
+
+ }
+
+ free((void *)line);
+ fclose(fp);
+ long long int sum = 0;
+
+ for(i = 0; i < pred_count; ++i)
+ {
+ sum += predict[i];
+ }
+
+ long long int half = sum / 2;
+ printf("sum %lld , half %lld.\n", sum, half);
+ qsort(predict, pred_count, sizeof(int), rev_comp);
+ sum = 0;
+
+ for(i = 0; i < pred_count; ++i)
+ {
+ printf("len:\t%d\n", predict[i]);
+ }
+
+ for(i = 0; i < pred_count; ++i)
+ {
+ sum += predict[i];
+ printf("len:\t%d\n", predict[i]);
+
+ if(sum >= half)
+ break;
+ }
+
+ printf("N50 %d , half %lld.\n", predict[i], half);
+ printf("used contig %d", used);
+}
+
+/*
+unsigned int traverse(unsigned int node,int *far_count,unsigned int *farpath,
+ int *curr_count,unsigned int *currpath,int *used_count,unsigned int *used,int *max_dist,int *node_dist){
+ unsigned int bal = getTwinCtg(node);
+
+ currpath[(*curr_count)++]=node;
+ used[(*used_count)++]=node;
+ used[(*used_count)++]=bal;
+ contig_array[node].inSubGraph=1;
+ contig_array[bal].inSubGraph=1;
+
+ fprintf(stderr,"farcount %d curr_count %d node_dist %d max_dist %d.\n",*far_count,*curr_count,*node_dist,*max_dist);
+ CONNECT *tmp_cnt=contig_array[node].downwardConnect;
+ while(tmp_cnt){
+ unsigned int ctg,bal_ctg;
+ ctg=tmp_cnt->contigID;
+ bal_ctg=getTwinCtg(ctg);
+ if(contig_array[ctg].inSubGraph||contig_array[bal_ctg].inSubGraph
+ ||contig_array[ctg].flag||contig_array[bal_ctg].flag){
+ tmp_cnt=tmp_cnt->next;
+ continue;
+ }
+ *node_dist+=(tmp_cnt->gapLen+contig_array[ctg].length);
+ if(*node_dist>*max_dist){
+ int i;
+ for(i=0;i<*curr_count;++i){
+ farpath[i]=currpath[i];
+ }
+ *far_count=*curr_count;
+ *max_dist=*node_dist+tmp_cnt->gapLen;
+ }
+ traverse(tmp_cnt->contigID,far_count,farpath,curr_count,currpath,used_count,used,max_dist,node_dist);
+ *node_dist-=(tmp_cnt->gapLen+contig_array[ctg].length);
+ tmp_cnt=tmp_cnt->next;
+ }
+ --(*curr_count);
+
+ return 0;
+}
+*/
diff --git a/fusion/prepare.c b/fusion/prepare.c
new file mode 100644
index 0000000..8df6ea9
--- /dev/null
+++ b/fusion/prepare.c
@@ -0,0 +1,254 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+#include "ctype.h"
+boolean upper_rev(char *in, int in_len);
+void print_seq(FILE *out_file, char *sequence , int sequence_len);
+char rev[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //0
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //10
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //20
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //30
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //40
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //50
+ 0, 0, 0, 0, 0, 'T', 0, 'G', 0, 0, //60
+ 0, 'C', 0, 0, 0, 0, 0, 0, 'N', 0, //70
+ 0, 0, 0, 0, 'A', 0, 0, 0, 0, 0, //80
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //90
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ };//100
+typedef struct io_ctg
+{
+ char *seq;
+ int len;
+ int bal;
+ char *name;
+} IO_CTG;
+
+static int cmp_ctg(const void *a, const void *b)
+{
+ IO_CTG *A = (IO_CTG *)a;
+ IO_CTG *B = (IO_CTG *)b;
+ return A->len - B->len;
+}
+
+int data_prepare()
+{
+ char file_name[256];
+
+ FILE *basic;
+ sprintf(file_name, "%s.preGraphBasic", graphfile);
+ basic = ckopen(file_name, "w");
+ fprintf(basic, "VERTEX 605681 K %d", overlaplen);
+ fprintf(basic, "\nEDGEs 1861091\n\nMaxReadLen 100 MinReadLen 0 MaxNameLen 256\n");
+ fclose(basic);
+
+ //char **ctg_seq=(char **)ckalloc(100000000*sizeof(char *));
+ //int *ctg_bal=(int *)ckalloc(100000000*sizeof(int));
+ //int *ctg_len=(int *)ckalloc(100000000*sizeof(int));
+
+ FILE *ctg_fp;
+ ctg_fp = ckopen(ctg_file, "r");
+ FILE *update, *index, *new_ctg;
+ sprintf(file_name, "%s.contig", graphfile);
+ new_ctg = ckopen(file_name, "w");
+ FILE *conver;
+ sprintf(file_name, "%s.conver", graphfile);
+ conver = ckopen(file_name, "w");
+
+
+ char *line;
+ line = (char *)ckalloc(100000000 * sizeof(char ));
+ char orig_name[1024];
+ char *seq;
+ IO_CTG *pre_ctg = (IO_CTG *)ckalloc(1000000000 * sizeof(IO_CTG));
+
+ seq = (char *)malloc(1000000000 * sizeof(char));
+ int cul_id = 1;
+ int total = 0;
+ fgets(line, 100000000 * sizeof(char ), ctg_fp);
+ sscanf(line, ">%s", orig_name);
+ int len = 0;
+
+ //fprintf(stderr,"reach here %d\n",__LINE__);
+ while(fgets(line, 100000000 * sizeof(char ), ctg_fp) != NULL)
+ {
+ if(line[0] == '>')
+ {
+ if(len < overlaplen)
+ {
+ sscanf(line, ">%s", orig_name);
+ seq[0] = '\0';
+ len = 0;
+ continue;
+ }
+
+ boolean flag = upper_rev(seq, len);
+ //fprintf(new_ctg,">%d length %d\n",cul_id,len);
+ //fprintf(conver,"%s\t%d\t%d\n",orig_name,cul_id,len);
+ //print_seq(new_ctg,seq,len);
+ //fprintf(new_ctg,"%s\n",seq);
+ char *one_seq = (char *)ckalloc((len + 100) * sizeof(char));
+ strcpy(one_seq, seq);
+
+ if(flag == 0)
+ {
+ pre_ctg[++total].seq = one_seq;
+ pre_ctg[total].bal = 2;
+ pre_ctg[total].len = len;
+ pre_ctg[total].name = (char *)malloc((strlen(orig_name) + 1) * sizeof(char));
+ strcpy(pre_ctg[total].name, orig_name);
+ //pre_ctg[++cul_id].bal=0;
+ cul_id += 2;
+ }
+ else
+ {
+ pre_ctg[++total].seq = one_seq;
+ pre_ctg[total].len = len;
+ pre_ctg[total].bal = 1;
+ pre_ctg[total].name = (char *)malloc((strlen(orig_name) + 1) * sizeof(char));
+ strcpy(pre_ctg[total].name, orig_name);
+ ++cul_id;
+ }
+
+ sscanf(line, ">%s", orig_name);
+ seq[0] = '\0';
+ len = 0;
+ }
+ else
+ {
+ //strcat(seq,line);//effective?
+ int single_len = strlen(line);
+ line[single_len - 1] = '\0';
+ strcpy(&seq[len], line);
+ len += single_len - 1;
+ }
+
+ }
+
+ if(len > overlaplen)
+ {
+ boolean flag = upper_rev(seq, len);
+ //fprintf(new_ctg,">%d length %d\n",cul_id,len);
+ //fprintf(conver,"%s\t%d\t%d\n",orig_name,cul_id,len);
+ //print_seq(new_ctg,seq,len);
+ //fprintf(new_ctg,"%s\n",seq);
+ char *one_seq = (char *)ckalloc((len + 100) * sizeof(char));
+ strcpy(one_seq, seq);
+
+ if(flag == 0)
+ {
+ pre_ctg[++total].seq = one_seq;
+ pre_ctg[total].bal = 2;
+ pre_ctg[total].len = len;
+ pre_ctg[total].name = (char *)malloc(strlen(orig_name) * sizeof(char));
+ strcpy(pre_ctg[total].name, orig_name);
+ //pre_ctg[++total].bal=0;
+ cul_id += 2;
+ }
+ else
+ {
+ pre_ctg[++total].seq = one_seq;
+ pre_ctg[total].len = len;
+ pre_ctg[total].bal = 1;
+ pre_ctg[total].name = (char *)malloc(strlen(orig_name) * sizeof(char));
+ strcpy(pre_ctg[total].name, orig_name);
+ ++cul_id;
+ }
+
+ }
+
+ fprintf(stderr, "All contigs loaded.\n");
+ sprintf(file_name, "%s.updated.edge", graphfile);
+ update = ckopen(file_name, "w");
+ sprintf(file_name, "%s.ContigIndex", graphfile);
+ index = ckopen(file_name, "w");
+ fprintf(update, "EDGEs %d\n", cul_id);
+ fprintf(index, "Edge_num %d %d\nindex\tlength\treverseComplement\n", cul_id, total);
+ qsort(&pre_ctg[1], total, sizeof(IO_CTG), cmp_ctg);
+
+ int i = 1;
+ cul_id = 0;
+
+ for(; i <= total; ++i)
+ {
+ if(pre_ctg[i].bal == 2)
+ {
+ len = pre_ctg[i].len;
+ fprintf(new_ctg, ">%d length %d\n", ++cul_id, len);
+ print_seq(new_ctg, pre_ctg[i].seq, len);
+ fprintf(conver, "%s\t%d\t%d\n", pre_ctg[i].name, cul_id, len);
+ // if(overlaplen<=31){
+ // fprintf(update,">length %d,fffffffffff,fffffffffff,1,8\n",len);
+ // fprintf(update,">length %d,fffffffffff,fffffffffff,-1,8\n",len);
+ // }else{
+ fprintf(update, ">length %d,1,8\n", len);
+ fprintf(update, ">length %d,-1,8\n", len);
+ // }
+ fprintf(index, "%d\t%d\t1\n", cul_id++, len);
+
+ }
+ else
+ {
+ fprintf(new_ctg, ">%d length %d\n", ++cul_id, len);
+ len = pre_ctg[i].len;
+ print_seq(new_ctg, pre_ctg[i].seq, len);
+ fprintf(conver, "%s\t%d\t%d\n", pre_ctg[i].name, cul_id, len);
+
+ if(overlaplen <= 31)
+ {
+ fprintf(update, ">length %d,fffffffffff,fffffffffff,0,8\n", len);
+ }
+ else
+ {
+ fprintf(update, ">length %d,0,8\n", len);
+ }
+
+ fprintf(index, "%d\t%d\t0\n", cul_id, len);
+ }
+ }
+
+ sprintf(file_name, "touch %s.Arc", graphfile);
+ system(file_name);
+ return 0;
+}
+
+//return value:0: in not equal its' rev_comp
+//1: in equal its' rev_comp
+boolean upper_rev(char *in, int in_len)
+{
+ int i, it_num;
+
+ boolean ret_flag = 1;
+ it_num = in_len / 2;
+
+ for(i = 0; i < it_num; ++i)
+ {
+ in[i] = toupper(in[i]);
+ in[in_len - i - 1] = toupper(in[in_len - i - 1]);
+
+ if(in[i] != rev[in[in_len - i - 1]])
+ {
+ ret_flag = 0;
+ }
+ }
+
+ return ret_flag;
+}
+
+void print_seq(FILE *out_file, char *sequence , int sequence_len)
+{
+ int it_num = sequence_len / 100 + 1;
+ int i;
+
+ for(i = 0; i < it_num; ++i)
+ {
+ char tmp;
+ tmp = sequence[(i + 1) * 100];
+ sequence[(i + 1) * 100] = '\0';
+ fprintf(out_file, "%s\n", &sequence[i * 100]);
+ sequence[(i + 1) * 100] = tmp;
+ }
+
+
+}
diff --git a/fusion/prlHashCtg.c b/fusion/prlHashCtg.c
new file mode 100644
index 0000000..f779081
--- /dev/null
+++ b/fusion/prlHashCtg.c
@@ -0,0 +1,387 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+//debugging variables
+static long long *kmerCounter;
+
+//buffer related varibles for chop kmer
+static unsigned int read_c;
+static char **rcSeq;
+static char *seqBuffer;
+static int *lenBuffer;
+static unsigned int *indexArray;
+static unsigned int *seqBreakers;
+static int *ctgIdArray;
+static Kmer *firstKmers;
+
+//buffer related varibles for splay tree
+static unsigned int buffer_size = 10000000;
+static unsigned int seq_buffer_size;
+static unsigned int max_read_c;
+static volatile unsigned int kmer_c;
+static Kmer *kmerBuffer, *hashBanBuffer;
+static boolean *smallerBuffer;
+
+static void singleKmer(int t, KmerSet *kset,
+ unsigned int seq_index, unsigned int pos);
+static void chopKmer4read(int t, int threadID);
+
+static void threadRoutine(void *para)
+{
+ PARAMETER *prm;
+ unsigned int i;
+ unsigned char id;
+
+ prm = (PARAMETER *)para;
+ id = prm->threadID;
+
+ //printf("%dth thread with threadID %d, hash_table %p\n",id,prm.threadID,prm.hash_table);
+ while(1)
+ {
+ if(*(prm->selfSignal) == 1)
+ {
+ unsigned int seq_index = 0;
+ unsigned int pos = 0;
+
+ for(i = 0; i < kmer_c; i++)
+ {
+ if(seq_index < read_c && indexArray[seq_index + 1] == i)
+ {
+ seq_index++; // which sequence this kmer belongs to
+ pos = 0;
+ }
+
+ //if((unsigned char)(hashBanBuffer[i]&taskMask)!=id){
+ if((unsigned char)(hashBanBuffer[i] % thrd_num) != id)
+ {
+ pos++;
+ continue;
+ }
+
+ kmerCounter[id + 1]++;
+ singleKmer(i, KmerSets[id], seq_index, pos++);
+ }
+
+ *(prm->selfSignal) = 0;
+ }
+ else if(*(prm->selfSignal) == 2)
+ {
+ for(i = 0; i < read_c; i++)
+ {
+ if(i % thrd_num != id)
+ continue;
+
+ chopKmer4read(i, id + 1);
+ }
+
+ *(prm->selfSignal) = 0;
+ }
+ else if(*(prm->selfSignal) == 3)
+ {
+ *(prm->selfSignal) = 0;
+ break;
+ }
+
+ usleep(1);
+ }
+}
+
+static void singleKmer(int t, KmerSet *kset,
+ unsigned int seq_index, unsigned int pos)
+{
+ boolean flag;
+ kmer_t *node;
+
+ flag = put_kmerset(kset, kmerBuffer[t], 4, 4, &node);
+
+ //printf("singleKmer: kmer %llx\n",kmerBuffer[t]);
+ if(!flag)
+ {
+ if(smallerBuffer[t])
+ node->twin = 0;
+ else
+ node->twin = 1;;
+
+ node->l_links = ctgIdArray[seq_index];
+ node->r_links = pos;
+ }
+ else
+ node->deleted = 1;
+}
+
+static void creatThrds(pthread_t *threads, PARAMETER *paras)
+{
+ unsigned char i;
+ int temp;
+
+ for(i = 0; i < thrd_num; i++)
+ {
+ //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
+ if((temp = pthread_create(&threads[i], NULL, (void *)threadRoutine, &(paras[i]))) != 0)
+ {
+ printf("create threads failed\n");
+ exit(1);
+ }
+ }
+
+ //printf("%d thread created\n", thrd_num);
+}
+
+static void thread_wait(pthread_t *threads)
+{
+ int i;
+
+ for(i = 0; i < thrd_num; i++)
+ if(threads[i] != 0)
+ pthread_join(threads[i], NULL);
+
+}
+
+static void chopKmer4read(int t, int threadID)
+{
+ char *src_seq = seqBuffer + seqBreakers[t];
+ char *bal_seq = rcSeq[threadID];
+ int len_seq = lenBuffer[t];
+ int j, bal_j;
+ Kmer hash_ban, bal_hash_ban;
+ Kmer word, bal_word;
+ int index;
+
+ word = 0;
+
+ for (index = 0; index < overlaplen; index++)
+ {
+ word <<= 2;
+ word += src_seq[index];
+ }
+
+ reverseComplementSeq(src_seq, len_seq, bal_seq);
+ // complementary node
+ bal_word = reverseComplement(word, overlaplen);
+ bal_j = len_seq - 0 - overlaplen; // 0;
+ index = indexArray[t];
+
+ if(word < bal_word)
+ {
+ hash_ban = hash_kmer(word);
+ kmerBuffer[index] = word;
+ hashBanBuffer[index] = hash_ban;
+ smallerBuffer[index++] = 1;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer(bal_word);
+ kmerBuffer[index] = bal_word;
+ hashBanBuffer[index] = bal_hash_ban;
+ smallerBuffer[index++] = 0;
+ }
+
+ //printf("%dth: %p with %p\n",kmer_c-1,bal_word,bal_hash_ban);
+ for(j = 1; j <= len_seq - overlaplen; j ++)
+ {
+ word = nextKmer(word, src_seq[j - 1 + overlaplen]);
+ bal_j = len_seq - j - overlaplen; // j;
+ bal_word = prevKmer(bal_word, bal_seq[bal_j]);
+
+ if(word < bal_word)
+ {
+ hash_ban = hash_kmer(word);
+ kmerBuffer[index] = word;
+ hashBanBuffer[index] = hash_ban;
+ smallerBuffer[index++] = 1;
+ //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
+ }
+ else
+ {
+ // complementary node
+ bal_hash_ban = hash_kmer(bal_word);
+ kmerBuffer[index] = bal_word;
+ hashBanBuffer[index] = bal_hash_ban;
+ smallerBuffer[index++] = 0;
+ //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
+ }
+ }
+}
+
+static void sendWorkSignal(unsigned char SIG, unsigned char *thrdSignals)
+{
+ int t;
+
+ for(t = 0; t < thrd_num; t++)
+ thrdSignals[t + 1] = SIG;
+
+ while(1)
+ {
+ usleep(10);
+
+ for(t = 0; t < thrd_num; t++)
+ if(thrdSignals[t + 1])
+ break;
+
+ if(t == thrd_num)
+ break;
+ }
+}
+
+static int getID(char *name)
+{
+ if(name[0] >= '0' && name[0] <= '9')
+ return atoi(&(name[0]));
+ else
+ return 0;
+}
+
+boolean prlContig2nodes(char *grapfile, int len_cut)
+{
+ long long i, num_seq;
+ char name[256], *next_name;
+ FILE *fp;
+ pthread_t threads[thrd_num];
+ time_t start_t, stop_t;
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+ int maxCtgLen, minCtgLen, nameLen;
+ unsigned int lenSum, contigId;
+
+ WORDFILTER = (((Kmer) 1) << (2 * overlaplen)) - 1;
+ time(&start_t);
+ sprintf(name, "%s.contig", grapfile);
+ fp = ckopen(name, "r");
+ maxCtgLen = nameLen = 10;
+ minCtgLen = 1000;
+ num_seq = readseqpar(&maxCtgLen, &minCtgLen, &nameLen, fp);
+ //printf("\nthere're %lld contigs in file: %s, max seq len %d, min seq len %d, max name len %d\n",
+ //num_seq,grapfile,maxCtgLen,minCtgLen,nameLen);
+ maxReadLen = maxCtgLen;
+ fclose(fp);
+ time(&stop_t);
+ //printf("time spent on parse contigs file %ds\n",(int)(stop_t-start_t));
+
+ next_name = (char *)ckalloc((maxNameLen + 1) * sizeof(char));
+
+ // extract all the EDONs
+ seq_buffer_size = buffer_size * 2;
+ max_read_c = seq_buffer_size / 20;
+
+ kmerBuffer = (Kmer *)ckalloc(buffer_size * sizeof(Kmer));
+ hashBanBuffer = (Kmer *)ckalloc(buffer_size * sizeof(Kmer));
+ smallerBuffer = (boolean *)ckalloc(buffer_size * sizeof(boolean));
+
+ seqBuffer = (char *)ckalloc(seq_buffer_size * sizeof(char));
+ lenBuffer = (int *)ckalloc(max_read_c * sizeof(int));
+ indexArray = (unsigned int *)ckalloc((max_read_c + 1) * sizeof(unsigned int));
+ seqBreakers = (unsigned int *)ckalloc((max_read_c + 1) * sizeof(unsigned int));
+ ctgIdArray = (int *)ckalloc(max_read_c * sizeof(int));
+
+ fp = ckopen(name, "r");
+ //node_mem_manager = createMem_manager(EDONBLOCKSIZE,sizeof(EDON));
+ rcSeq = (char **)ckalloc((thrd_num + 1) * sizeof(char *));
+
+ if(1)
+ {
+ kmerCounter = (long long *)ckalloc((thrd_num + 1) * sizeof(long long));
+ KmerSets = (KmerSet **)ckalloc(thrd_num * sizeof(KmerSet *));
+
+ for(i = 0; i < thrd_num; i++)
+ {
+ KmerSets[i] = init_kmerset(1024, 0.77f);
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ kmerCounter[i + 1] = 0;
+ rcSeq[i + 1] = (char *)ckalloc(maxCtgLen * sizeof(char));
+ }
+
+ creatThrds(threads, paras);
+ }
+
+ kmer_c = thrdSignal[0] = kmerCounter[0] = 0;
+
+ time(&start_t);
+ read_c = lenSum = i = seqBreakers[0] = indexArray[0] = 0;
+
+ readseq1by1(seqBuffer + seqBreakers[read_c], next_name, &(lenBuffer[read_c]), fp, -1);
+
+ while(!feof(fp))
+ {
+ contigId = getID(next_name);
+ readseq1by1(seqBuffer + seqBreakers[read_c], next_name, &(lenBuffer[read_c]), fp, 1);
+
+ //if((++i)%10000000==0)
+ //printf("%lldth contigs processed.\n",i);
+ if(lenBuffer[read_c] < overlaplen + 1 || lenBuffer[read_c] < len_cut)
+ {
+ contigId = getID(next_name);
+ continue;
+ }
+
+ //fprintf(stderr,"len of seq %d is %d, ID %d\n",read_c,lenBuffer[read_c],contigId);
+ ctgIdArray[read_c] = contigId > 0 ? contigId : i;
+ lenSum += lenBuffer[read_c];
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+ seqBreakers[read_c] = lenSum;
+ indexArray[read_c] = kmer_c;
+
+ //printf("seq %d start at %d\n",read_c,seqBreakers[read_c]);
+ if(read_c == max_read_c || (lenSum + maxCtgLen) > seq_buffer_size || (kmer_c + maxCtgLen - overlaplen + 1) > buffer_size)
+ {
+ kmerCounter[0] += kmer_c;
+ sendWorkSignal(2, thrdSignal);
+ sendWorkSignal(1, thrdSignal);
+
+ kmer_c = read_c = lenSum = 0;
+ }
+
+ }
+
+ if(read_c)
+ {
+ kmerCounter[0] += kmer_c;
+ sendWorkSignal(2, thrdSignal);
+ sendWorkSignal(1, thrdSignal);
+ }
+
+ sendWorkSignal(3, thrdSignal);
+
+ thread_wait(threads);
+ time(&stop_t);
+
+ //printf("time spent on hash reads: %ds\n",(int)(stop_t-start_t));
+ if(1)
+ {
+ unsigned long long alloCounter = 0;
+ unsigned long long allKmerCounter = 0;
+
+ for(i = 0; i < thrd_num; i++)
+ {
+ alloCounter += count_kmerset((KmerSets[i]));
+ allKmerCounter += kmerCounter[i + 1];
+ free((void *)rcSeq[i + 1]);
+ }
+
+ printf("[%s]%lli nodes allocated, %lli kmer in contigs, %lli kmer processed\n"
+ , __FUNCTION__, alloCounter, kmerCounter[0], allKmerCounter);
+ }
+
+ free((void *)rcSeq);
+ free((void *)kmerCounter);
+
+ free((void *)seqBuffer);
+ free((void *)lenBuffer);
+ free((void *)indexArray);
+ free((void *)seqBreakers);
+ free((void *)ctgIdArray);
+
+ free((void *)kmerBuffer);
+ free((void *)hashBanBuffer);
+ free((void *)smallerBuffer);
+
+ free((void *)next_name);
+ fclose(fp);
+
+ return 1;
+}
diff --git a/fusion/prlRead2Ctg.c b/fusion/prlRead2Ctg.c
new file mode 100644
index 0000000..0964b4c
--- /dev/null
+++ b/fusion/prlRead2Ctg.c
@@ -0,0 +1,1090 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+static ubyte binLight[16384];
+static ubyte probableMatrix[4][4] =
+{
+ // A C T G 7 3 2 1
+ {7, 2, 1, 3}, // A->A G C T
+ {2, 7, 3, 1}, // C->C T A G
+ {1, 3, 7, 2}, // T->T C G A
+ {3, 1, 2, 7} // G->G A T C
+};
+
+static ubyte2 doubleBitMasker[7] =
+{
+ 0x3, //000000 00000011
+ 0xC, //000000 00001100
+ 0x30, //000000 00110000
+ 0xC0, //000000 11000000
+ 0x300, //000011 00000000
+ 0xC00, //001100 00000000
+ 0x3000 //110000 00000000
+};
+
+static boolean staticFlag = 1;
+
+static long long readsInGap = 0;
+
+static int buffer_size = 10000000;
+static long long readCounter;
+static long long mapCounter;
+long long single_count;
+long long single_map;
+static int ALIGNLEN = 0;
+//buffer related varibles for chop kmer
+static int read_c;
+static char **rcSeq;
+static char **seqBuffer;
+static int *lenBuffer;
+static unsigned int *ctgIdArray;
+static int *posArray;
+static char *orienArray;
+static char *footprint; // flag indicates whether the read shoulld leave markers on contigs
+
+// kmer related variables
+static int kmer_c;
+static Kmer *kmerBuffer, *hashBanBuffer;
+static kmer_t **nodeBuffer;
+static boolean *smallerBuffer;
+static unsigned int *indexArray;
+
+static int *deletion;
+
+static void parse1read(int t, int threadID);
+static void threadRoutine(void *thrdID);
+static void searchKmer(int t, KmerSet *kset);
+static void chopKmer4read(int t, int threadID);
+static void thread_wait(pthread_t *threads);
+
+static void creatThrds(pthread_t *threads, PARAMETER *paras)
+{
+ unsigned char i;
+ int temp;
+
+ for(i = 0; i < thrd_num; i++)
+ {
+ //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
+ if((temp = pthread_create(&threads[i], NULL, (void *)threadRoutine, &(paras[i]))) != 0)
+ {
+ printf("create threads failed\n");
+ exit(1);
+ }
+ }
+
+ //printf("%d thread created\n",thrd_num);
+}
+
+static void threadRoutine(void *para)
+{
+ PARAMETER *prm;
+ int i, t;
+ unsigned char id;
+
+ prm = (PARAMETER *)para;
+ id = prm->threadID;
+
+ //printf("%dth thread with task %d, hash_table %p\n",id,prm.task,prm.hash_table);
+ while(1)
+ {
+ if(*(prm->selfSignal) == 1)
+ {
+ for(i = 0; i < kmer_c; i++)
+ {
+ //if((hashBanBuffer[i]&taskMask)!=prm.threadID)
+ if((hashBanBuffer[i] % thrd_num) != id)
+ continue;
+
+ searchKmer(i, KmerSets[id]);
+ }
+
+ *(prm->selfSignal) = 0;
+ }
+ else if(*(prm->selfSignal) == 2)
+ {
+ for(i = 0; i < read_c; i++)
+ {
+ if(i % thrd_num != id)
+ continue;
+
+ chopKmer4read(i, id + 1);
+ }
+
+ *(prm->selfSignal) = 0;
+ }
+ else if(*(prm->selfSignal) == 3)
+ {
+ // parse reads
+ for(t = 0; t < read_c; t++)
+ {
+ if(t % thrd_num != id)
+ continue;
+
+ parse1read(t, id + 1);
+ }
+
+ *(prm->selfSignal) = 0;
+ }
+ else if(*(prm->selfSignal) == 5)
+ {
+ *(prm->selfSignal) = 0;
+ break;
+ }
+
+ usleep(1);
+ }
+}
+/*
+static void chopReads()
+{
+ int i;
+ for(i=0;i<read_c;i++){
+ chopKmer4read(i,0);
+ }
+}
+*/
+static void chopKmer4read(int t, int threadID)
+{
+ int len_seq = lenBuffer[t];
+
+ if(len_seq < overlaplen + 1)
+ return;
+
+ char *src_seq = seqBuffer[t];
+ char *bal_seq = rcSeq[threadID];
+ int j, bal_j;
+ Kmer hash_ban, bal_hash_ban;
+ Kmer word, bal_word;
+ int index;
+
+ word = 0;
+
+ for (index = 0; index < overlaplen; index++)
+ {
+ word <<= 2;
+ word += src_seq[index];
+ }
+
+ reverseComplementSeq(src_seq, len_seq, bal_seq);
+
+ // complementary node
+ bal_word = reverseComplement(word, overlaplen);
+ bal_j = len_seq - 0 - overlaplen; // 0;
+ index = indexArray[t];
+
+ if(word < bal_word)
+ {
+ hash_ban = hash_kmer(word);
+ kmerBuffer[index] = word;
+ smallerBuffer[index] = 1;
+ hashBanBuffer[index++] = hash_ban;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer(bal_word);
+ kmerBuffer[index] = bal_word;
+ smallerBuffer[index] = 0;
+ hashBanBuffer[index++] = bal_hash_ban;
+ }
+
+ //printf("%dth: %p with %p\n",kmer_c-1,bal_word,bal_hash_ban);
+ for(j = 1; j <= len_seq - overlaplen; j ++)
+ {
+ word = nextKmer(word, src_seq[j - 1 + overlaplen]);
+ bal_j = len_seq - j - overlaplen; // j;
+ bal_word = prevKmer(bal_word, bal_seq[bal_j]);
+
+ if(word < bal_word)
+ {
+ hash_ban = hash_kmer(word);
+ kmerBuffer[index] = word;
+ smallerBuffer[index] = 1;
+ hashBanBuffer[index++] = hash_ban;
+ //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
+ }
+ else
+ {
+ // complementary node
+ bal_hash_ban = hash_kmer(bal_word);
+ kmerBuffer[index] = bal_word;
+ smallerBuffer[index] = 0;
+ hashBanBuffer[index++] = bal_hash_ban;
+ //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
+ }
+ }
+}
+
+//splay for one kmer in buffer and save the node to nodeBuffer
+static void searchKmer(int t, KmerSet *kset)
+{
+ kmer_t *node;
+
+ boolean found = search_kmerset(kset, kmerBuffer[t], &node);
+
+ ++kset->searchCnt;
+
+ if(found)
+ {
+ ++kset->foundCnt;
+
+ if(!node->deleted)
+ nodeBuffer[t] = node;
+ else
+ {
+ ++kset->delCnt;
+ nodeBuffer[t] = NULL;
+ }
+ }
+ else
+ {
+ ++kset->searchSpcSeedCnt;
+
+ boolean spcFlag;
+ Kmer buff_kmer, spc_kmer;
+ ubyte2 spc_bases;
+ spcKmer *rs;
+ spcBase *tmpBase;
+
+ buff_kmer = kmerBuffer[t];
+ spc_kmer = ((buff_kmer >> 14) & 0xFFFFFFF00) | ((buff_kmer >> 12) & 0xC0) | ((buff_kmer >> 10) & 0x3C) | ((buff_kmer >> 6) & 0x3);
+ spc_bases = ((buff_kmer >> 8) & 0x3000) | ((buff_kmer >> 6) & 0xC00) | ((buff_kmer >> 2) & 0x3C0) | (buff_kmer & 0x3F);
+
+ spcFlag = search_spckmerset(spcSet, spc_kmer, &rs);
+
+ if(spcFlag)
+ {
+ ++kset->getSpcSeedCnt;
+
+ int i = 0, j = 0, getFlag = -1;
+ int mismatch = 0;
+ ubyte2 tmp, mostLastBase; //loci flags
+ ubyte2 bestSpcBases; //best spaced bases
+ int min_mis = 31;
+ ubyte2 tmpSpcBase;
+
+ tmpBase = rs->start;
+
+ //fprintf(stderr,"search %llu\tspc_kmer %u\tspc_bases %u\n", kmerBuffer[t], spc_kmer, spc_bases);
+
+ while(tmpBase != NULL)
+ {
+ tmpSpcBase = tmpBase->spaced_bases;
+ tmp = ((spc_bases ^ tmpSpcBase) & 0x5555) | (((spc_bases ^ tmpSpcBase) & 0xAAAA) >> 1);
+ mismatch = binLight[tmp];
+
+ if(mismatch < min_mis) //get the minimal mismatch spaced_bases
+ {
+ min_mis = mismatch;
+ mostLastBase = tmp;
+ bestSpcBases = tmpSpcBase;
+ node = tmpBase->large_kmer;
+ getFlag = 0;
+ }
+ else if(mismatch == min_mis) //if same amount of mismatch, choose the most right mismatch pos
+ {
+ if(tmp < mostLastBase)
+ {
+ mostLastBase = tmp;
+ bestSpcBases = tmpSpcBase;
+ node = tmpBase->large_kmer;
+ getFlag = 1;
+ }
+ else if(tmp == mostLastBase) //if same mismatch pos, choose the most probable one[see probableMatrix]
+ {
+ /*
+ static ubyte probableMatrix[4][4] = {
+ //A C T G 7 3 2 1
+ 7, 2, 1, 3, // A->A G C T
+ 2, 7, 3, 1, // C->C T A G
+ 1, 3, 7, 2, // T->T C G A
+ 3, 1, 2, 7 // G->G A T C
+ };
+ */
+ getFlag = 2;
+ ubyte2 readBases = spc_bases, loopBases = tmpSpcBase, bestBases = bestSpcBases, mismatchFlag = tmp;
+
+ for(j = 0; j < 7; j++)
+ {
+ if((mismatchFlag & 0x3) > 0)
+ {
+ if(probableMatrix[(readBases & 0x3)][(loopBases & 0x3)] > probableMatrix[(readBases & 0x3)][(bestBases & 0x3)])
+ //check each 2 bits(1 base) if mismatch
+ {
+ mostLastBase = tmp;
+ bestSpcBases = tmpSpcBase;
+ node = tmpBase->large_kmer;
+ break;
+ }
+ else if((probableMatrix[(readBases & 0x3)][(loopBases & 0x3)] < probableMatrix[(readBases & 0x3)][(bestBases & 0x3)]))
+ break;
+ }
+
+ mismatchFlag >>= 2;
+ readBases >>= 2;
+ loopBases >>= 2;
+ bestBases >>= 2;
+ }
+ }
+ }
+
+ tmpBase = tmpBase->next;
+ }
+
+ if(getFlag < 0)
+ {
+ fprintf(stderr, "getFlag error at %llu", kmerBuffer[t]);
+ exit(-1);
+ }
+
+ ++kset->levelGet[getFlag];
+ nodeBuffer[t] = node;
+ }
+ else
+ nodeBuffer[t] = NULL;
+ }
+}
+
+static void parse1read(int t, int threadID)
+{
+ unsigned int j, i, s;
+ unsigned int contigID;
+ int counter2 = 0, counter;
+ unsigned int ctgLen, pos;
+ kmer_t *node;
+ boolean isSmaller;
+ int flag, maxOcc = 0;
+ kmer_t *maxNode = NULL;
+ int alldgnLen = lenBuffer[t] > ALIGNLEN ? ALIGNLEN : lenBuffer[t];
+ int multi = alldgnLen - overlaplen + 1 < 5 ? 5 : alldgnLen - overlaplen + 1;
+ unsigned int start, finish;
+
+ footprint[t] = 0;
+
+ start = indexArray[t];
+ finish = indexArray[t + 1];
+
+ if(finish == start) //too short
+ {
+ ctgIdArray[t] = 0;
+ deletion[threadID]++;
+ return;
+ }
+
+ for(j = start; j < finish; j++)
+ if(nodeBuffer[j])
+ counter2++;
+
+ if(counter2 < 2)
+ deletion[threadID]++;
+
+ counter = counter2 = 0;
+
+ for(j = start; j < finish; j++)
+ {
+ node = nodeBuffer[j];
+
+ if(!node) //same as previous
+ continue;
+
+ flag = 1;
+
+ for(s = j + 1; s < finish; s++)
+ {
+ if(!nodeBuffer[s])
+ continue;
+
+ if(nodeBuffer[s]->l_links == node->l_links)
+ {
+ flag++;
+ nodeBuffer[s] = NULL;
+ }
+ }
+
+ if(flag >= 2)
+ counter2++; //a loose alignment
+
+ if(flag >= multi)
+ counter++;
+ else
+ continue;
+
+ if(flag > maxOcc)
+ {
+ pos = j;
+ maxOcc = flag;
+ maxNode = node;
+ }
+ }
+
+ if(!counter) //no match
+ {
+ ctgIdArray[t] = 0;
+ return;
+ }
+
+ if(counter2 > 1)
+ footprint[t] = 1; //aligned to multi contigs
+
+ j = pos;
+ i = pos - start + 1;
+ node = nodeBuffer[j];
+ isSmaller = smallerBuffer[j];
+ contigID = node->l_links;
+ ctgLen = contig_array[contigID].length;
+ pos = node->r_links;
+
+ if(node->twin == isSmaller)
+ {
+ orienArray[t] = '-';
+ ctgIdArray[t] = getTwinCtg(contigID);
+ posArray[t] = ctgLen - pos - overlaplen - i + 1;
+ }
+ else
+ {
+ orienArray[t] = '+';
+ ctgIdArray[t] = contigID;
+ posArray[t] = pos - i + 1;
+ }
+
+}
+
+static void sendWorkSignal(unsigned char SIG, unsigned char *thrdSignals)
+{
+ int t;
+
+ for(t = 0; t < thrd_num; t++)
+ thrdSignals[t + 1] = SIG;
+
+ while(1)
+ {
+ usleep(10);
+
+ for(t = 0; t < thrd_num; t++)
+ if(thrdSignals[t + 1])
+ break;
+
+ if(t == thrd_num)
+ break;
+ }
+}
+
+static void locate1read(int t)
+{
+ int i, j, start, finish;
+ kmer_t *node;
+ unsigned int contigID;
+ int pos, ctgLen;
+ boolean isSmaller;
+
+ start = indexArray[t];
+ finish = indexArray[t + 1];
+
+ for(j = start; j < finish; j++)
+ {
+ node = nodeBuffer[j];
+
+ if(!node) //same as previous
+ continue;
+
+ i = j - start + 1;
+ isSmaller = smallerBuffer[j];
+ contigID = node->l_links;
+ ctgLen = contig_array[contigID].length;
+ pos = node->r_links;
+
+ if(node->twin == isSmaller)
+ {
+ ctgIdArray[t] = getTwinCtg(contigID);
+ posArray[t] = ctgLen - pos - overlaplen - i + 1;
+ }
+ else
+ {
+ ctgIdArray[t] = contigID;
+ posArray[t] = pos - i + 1;
+ }
+ }
+
+}
+
+static void output1read(int t, FILE *outfp)
+{
+ int len = lenBuffer[t];
+ int index;
+ readsInGap++;
+
+ /*
+ if(ctgIdArray[t]==735||ctgIdArray[t]==getTwinCtg(735)){
+ printf("%d\t%d\t%d\t",t+1,ctgIdArray[t],posArray[t]);
+ int j;
+ for(j=0;j<len;j++)
+ printf("%c",int2base((int)seqBuffer[t][j]));
+ printf("\n");
+ }
+ */
+ for(index = 0; index < len; index++)
+ writeChar2tightString(seqBuffer[t][index], rcSeq[1], index);
+
+ fwrite(&len, sizeof(int), 1, outfp);
+ fwrite(&ctgIdArray[t], sizeof(int), 1, outfp);
+ fwrite(&posArray[t], sizeof(int), 1, outfp);
+ fwrite(rcSeq[1], sizeof(char), len / 4 + 1, outfp);
+
+}
+
+static void getReadIngap(int t, int insSize, FILE *outfp, boolean readOne)
+{
+ int read1, read2;
+
+ if(readOne)
+ {
+ read1 = t;
+ read2 = t + 1;
+ ctgIdArray[read1] = ctgIdArray[read2];
+ posArray[read1] = posArray[read2] + insSize - lenBuffer[read1]; // --> R2 <-- R1
+ output1read(read1, outfp);
+ }
+ else
+ {
+ read2 = t;
+ read1 = t - 1;
+ ctgIdArray[read2] = ctgIdArray[read1];
+ posArray[read2] = posArray[read1] + insSize - lenBuffer[read2]; // --> R1 <-- R2
+ output1read(read2, outfp);
+ }
+}
+
+static void recordLongRead(FILE *outfp)
+{
+ int t;
+
+ for(t = 0; t < read_c; t++)
+ {
+ readCounter++;
+
+ if(footprint[t])
+ output1read(t, outfp);
+ }
+}
+
+static void recordAlldgn(FILE *outfp, int insSize, FILE *outfp2)
+{
+ int t, ctgId;
+ boolean rd1gap, rd2gap;
+
+ for(t = 0; t < read_c; t++)
+ {
+ readCounter++;
+ single_count++;
+ rd1gap = rd2gap = 0;
+ ctgId = ctgIdArray[t];
+
+ if(outfp2 && t % 2 == 1) //make sure this is read2 in a pair
+ {
+ if(ctgIdArray[t] < 1 && ctgIdArray[t - 1] > 0)
+ {
+ getReadIngap(t, insSize, outfp2, 0); //read 2 in gap
+ rd2gap = 1;
+ }
+ else if(ctgIdArray[t] > 0 && ctgIdArray[t - 1] < 1)
+ {
+ getReadIngap(t - 1, insSize, outfp2, 1); //read 1 in gap
+ rd1gap = 1;
+ }
+ }
+
+ if(ctgId < 1)
+ continue;
+
+ mapCounter++;
+ single_map++;
+ fprintf(outfp, "%lld\t%u\t%d\t%c\n", readCounter,
+ ctgIdArray[t], posArray[t], orienArray[t]);
+
+ if(t % 2 == 0)
+ continue;
+
+ if(outfp2 && footprint[t - 1] && !rd1gap)
+ output1read(t - 1, outfp2);
+
+ if(outfp2 && footprint[t] && !rd2gap)
+ output1read(t, outfp2);
+
+ }
+}
+
+//load contig index and length
+void basicContigInfo(char *infile)
+{
+ char name[256], lldne[1024];
+ FILE *fp;
+ int length, bal_ed, num_all, num_long, index;
+
+ sprintf(name, "%s.ContigIndex", infile);
+ fp = ckopen(name, "r");
+
+ fgets(lldne, sizeof(lldne), fp);
+ sscanf(lldne + 8, "%d %d", &num_all, &num_long);
+ //printf("%d edges in graph\n",num_all);
+ num_ctg = num_all;
+ contig_array = (CONTIG *)ckalloc((num_all + 1) * sizeof(CONTIG));
+
+ fgets(lldne, sizeof(lldne), fp);
+ num_long = 0;
+
+ while(fgets(lldne, sizeof(lldne), fp) != NULL)
+ {
+ sscanf(lldne, "%d %d %d", &index, &length, &bal_ed);
+
+ contig_array[++num_long].length = length;
+ contig_array[num_long].bal_edge = bal_ed + 1;
+
+ if(index != num_long)
+ printf("basicContigInfo: %d vs %d\n", index, num_long);
+
+ if(bal_ed == 0)
+ continue;
+
+ contig_array[++num_long].length = length;
+ contig_array[num_long].bal_edge = -bal_ed + 1;
+
+ }
+
+ fclose(fp);
+}
+
+void prlRead2Ctg(char *libfile, char *outfile)
+{
+ long long i;
+ char *src_name, *next_name, name[256];
+ FILE *fo, *outfp2 = NULL;
+ int maxReadNum, libNo, prevLibNo, insSize;
+ boolean flag, pairs = 1;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+
+ maxReadLen = 0;
+ maxNameLen = 256;
+ scan_libInfo(libfile);
+ alloc_pe_mem(num_libs);
+
+ if(!maxReadLen)
+ maxReadLen = 100;
+
+ //printf("In file: %s, max seq len %d, max name len %d\n\n",
+ //libfile,maxReadLen,maxNameLen);
+ if(maxReadLen > maxReadLen4all)
+ maxReadLen4all = maxReadLen;
+
+ //////////////////////////////////////////// spcSet
+ fflush(stdout);
+
+ ubyte2 spc_i, spc_j;
+
+ for(spc_i = 0; spc_i < 16384; spc_i++)
+ {
+ binLight[spc_i] = 0;
+
+ for(spc_j = spc_i; spc_j; spc_j = spc_j & (spc_j - 1))
+ ++binLight[spc_i];
+ }
+
+ spcSet = init_spckmerset(KmerSets[thrd_num - 1]->size * thrd_num, 0.77f);
+
+
+ for(i = 0; i < thrd_num; i++)
+ {
+ buildSpcKmerSet(KmerSets[i], spcSet);
+ mvnv(0, "%lldth spaced bases set build complete.", i);
+ }
+
+ //////////////////////////////////////////// END spcSet
+
+ src_name = (char *)ckalloc((maxNameLen + 1) * sizeof(char));
+ next_name = (char *)ckalloc((maxNameLen + 1) * sizeof(char));
+
+ kmerBuffer = (Kmer *)ckalloc(buffer_size * sizeof(Kmer));
+ hashBanBuffer = (Kmer *)ckalloc(buffer_size * sizeof(Kmer));
+ nodeBuffer = (kmer_t **)ckalloc(buffer_size * sizeof(kmer_t *));
+ smallerBuffer = (boolean *)ckalloc(buffer_size * sizeof(boolean));
+
+ maxReadNum = buffer_size / (maxReadLen - overlaplen + 1);
+ maxReadNum = maxReadNum % 2 == 0 ? maxReadNum : maxReadNum - 1; //make sure paired reads are processed at the same batch
+ seqBuffer = (char **)ckalloc(maxReadNum * sizeof(char *));
+ lenBuffer = (int *)ckalloc(maxReadNum * sizeof(int));
+ indexArray = (unsigned int *)ckalloc((maxReadNum + 1) * sizeof(unsigned int));
+ ctgIdArray = (unsigned int *)ckalloc((maxReadNum + 1) * sizeof(unsigned int));
+ posArray = (int *)ckalloc((maxReadNum + 1) * sizeof(int));
+ orienArray = (char *)ckalloc((maxReadNum + 1) * sizeof(char));
+ footprint = (char *)ckalloc((maxReadNum + 1) * sizeof(char));
+
+ for(i = 0; i < maxReadNum; i++)
+ seqBuffer[i] = (char *)ckalloc(maxReadLen * sizeof(char));
+
+ rcSeq = (char **)ckalloc((thrd_num + 1) * sizeof(char *));
+ deletion = (int *)ckalloc((thrd_num + 1) * sizeof(int));
+ thrdSignal[0] = 0;
+ deletion[0] = 0;
+
+ if(1)
+ {
+ for(i = 0; i < thrd_num; i++)
+ {
+ rcSeq[i + 1] = (char *)ckalloc(maxReadLen * sizeof(char));
+ deletion[i + 1] = 0;
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ }
+
+ creatThrds(threads, paras);
+ }
+
+ if(!contig_array)
+ basicContigInfo(outfile);
+
+ sprintf(name, "%s.readInGap", outfile);
+ outfp2 = ckopen(name, "wb");
+
+ sprintf(name, "%s.readOnContig", outfile);
+ fo = ckopen(name, "w");
+ fprintf(fo, "read\tcontig\tpos\n");
+ readCounter = mapCounter = 0;
+ single_map = single_count = 0;
+ kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = readsInGap = 0;
+ prevLibNo = -1;
+
+ //mvnv(0,"Start loading reads.");
+
+ while((flag = read1seqInLib(seqBuffer[read_c], next_name, &(lenBuffer[read_c]), &libNo, pairs, 0)) != 0)
+ {
+ if(libNo != prevLibNo)
+ {
+ prevLibNo = libNo;
+ insSize = lib_array[libNo].avg_ins;
+ ALIGNLEN = lib_array[libNo].map_len;
+
+ if(insSize > 1000)
+ ALIGNLEN = ALIGNLEN < 35 ? 35 : ALIGNLEN;
+ else
+ ALIGNLEN = ALIGNLEN < 32 ? 32 : ALIGNLEN;
+
+ //printf("current insert size %d, map_len %d\n",insSize,ALIGNLEN);
+
+ }
+
+ if(insSize > 1000)
+ ALIGNLEN = ALIGNLEN < (lenBuffer[read_c] / 2 + 1) ? (lenBuffer[read_c] / 2 + 1) : ALIGNLEN;
+
+ // if((++i)%100000000==0)
+ // printf("[%s]%lld reads processed.\n",__FUNCTION__,i);
+ indexArray[read_c] = kmer_c;
+
+ if(lenBuffer[read_c] >= overlaplen + 1)
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+
+ read_c++;
+
+ if(read_c == maxReadNum)
+ {
+ //mvnv(0,"Start processing reads.");
+
+ indexArray[read_c] = kmer_c;
+
+ sendWorkSignal(2, thrdSignal);
+ //mvnv(0,"chop finished one buffer.");
+ sendWorkSignal(1, thrdSignal);
+ //mvnv(0,"search finished one buffer.");
+ sendWorkSignal(3, thrdSignal);
+ //mvnv(0,"parse finished one buffer.");
+
+ recordAlldgn(fo, insSize, outfp2);
+ kmer_c = 0;
+ read_c = 0;
+ }
+ }
+
+ if(read_c)
+ {
+ indexArray[read_c] = kmer_c;
+ sendWorkSignal(2, thrdSignal);
+ sendWorkSignal(1, thrdSignal);
+ sendWorkSignal(3, thrdSignal);
+ recordAlldgn(fo, insSize, outfp2);
+ //printf("Output %lld out of %lld (%.1f)%% reads in gaps\n",readsInGap,readCounter,
+ // (float)readsInGap/readCounter*100);
+ }
+
+ if(readCounter)
+ printf("[%s]total %llu reads , map-rate (%.1f)%%\n", __FUNCTION__,
+ readCounter, (float)mapCounter / readCounter * 100);
+
+ sendWorkSignal(5, thrdSignal);
+
+ thread_wait(threads);
+ fclose(fo);
+
+ sprintf(name, "%s.peGrads", outfile);
+ fo = ckopen(name, "w");
+ fprintf(fo, "grads&num: %d\t%lld\t%d\n", gradsCounter, n_solexa, maxReadLen4all);
+
+ if(pairs)
+ {
+ if(gradsCounter)
+ ;
+ //printf("%d pe insert size, the largest boundary is %lld\n\n",
+ //gradsCounter,pes[gradsCounter-1].PE_bound);
+ else
+ printf("no paired reads found\n");
+
+ for(i = 0; i < gradsCounter; i++)
+ fprintf(fo, "%d\t%lld\t%d\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank, pes[i].pair_num_cut);
+ }
+
+ fclose(fo);
+
+ fclose(outfp2);
+
+ free_pe_mem();
+ free_libs();
+
+ if(1) // multi-threads
+ {
+ for(i = 0; i < thrd_num; i++)
+ {
+ deletion[0] += deletion[i + 1];
+ free((void *)rcSeq[i + 1]);
+ }
+ }
+
+ //printf("%d reads deleted\n",deletion[0]);
+
+ ubyte8 searchCntTot = 0, foundCntTot = 0, delCntTot = 0,
+ searchSpcSeedCntTot = 0, getSpcSeedCntTot = 0, levelGet1 = 0, levelGet2 = 0, levelGet3 = 0;
+
+ for(i = 0; i < thrd_num; i++)
+ {
+ searchCntTot += KmerSets[i]->searchCnt;
+ foundCntTot += KmerSets[i]->foundCnt;
+ delCntTot += KmerSets[i]->delCnt;
+ searchSpcSeedCntTot += KmerSets[i]->searchSpcSeedCnt;
+ getSpcSeedCntTot += KmerSets[i]->getSpcSeedCnt;
+ levelGet1 += KmerSets[i]->levelGet[0];
+ levelGet2 += KmerSets[i]->levelGet[1];
+ levelGet3 += KmerSets[i]->levelGet[2];
+ }
+
+ fprintf(stderr, "SEARCH: Search %llu, get %llu, deleted %llu\n",
+ searchCntTot, foundCntTot, delCntTot);
+ fprintf(stderr, "SPACED SEED: Search %llu, get %llu, LVnum %llu, LVpos %llu, LVpro %llu\n",
+ searchSpcSeedCntTot, getSpcSeedCntTot, levelGet1, levelGet2, levelGet3);
+
+ free((void *)rcSeq);
+ free((void *)deletion);
+
+ for(i = 0; i < maxReadNum; i++)
+ free((void *)seqBuffer[i]);
+
+ free((void *)seqBuffer);
+ free((void *)lenBuffer);
+ free((void *)indexArray);
+
+ free((void *)kmerBuffer);
+ free((void *)smallerBuffer);
+ free((void *)hashBanBuffer);
+ free((void *)nodeBuffer);
+ free((void *)ctgIdArray);
+ free((void *)posArray);
+ free((void *)orienArray);
+ free((void *)footprint);
+
+ free((void *)src_name);
+ free((void *)next_name);
+
+ if(contig_array)
+ {
+ free((void *)contig_array);
+ contig_array = NULL;
+ }
+}
+
+static void thread_wait(pthread_t *threads)
+{
+ int i;
+
+ for(i = 0; i < thrd_num; i++)
+ if(threads[i] != 0)
+ pthread_join(threads[i], NULL);
+
+}
+
+/********************* map long reads for gap filling ************************/
+void prlLongRead2Ctg(char *libfile, char *outfile)
+{
+ long long i;
+ char *src_name, *next_name, name[256];
+ FILE *outfp2;
+ int maxReadNum, libNo, prevLibNo;
+ boolean flag, pairs = 0;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+
+ maxReadLen = 0;
+ maxNameLen = 256;
+ scan_libInfo(libfile);
+
+ if(!maxReadLen)
+ maxReadLen = 100;
+
+ int longReadLen = getMaxLongReadLen(num_libs);
+
+ if(longReadLen < 1) // no long reads
+ return;
+
+ maxReadLen4all = maxReadLen < longReadLen ? longReadLen : maxReadLen;
+ //printf("In file: %s, long read len %d, max name len %d\n\n",
+ //libfile,longReadLen,maxNameLen);
+ maxReadLen = longReadLen;
+
+ src_name = (char *)ckalloc((maxNameLen + 1) * sizeof(char));
+ next_name = (char *)ckalloc((maxNameLen + 1) * sizeof(char));
+
+ kmerBuffer = (Kmer *)ckalloc(buffer_size * sizeof(Kmer));
+ hashBanBuffer = (Kmer *)ckalloc(buffer_size * sizeof(Kmer));
+ nodeBuffer = (kmer_t **)ckalloc(buffer_size * sizeof(kmer_t *));
+ smallerBuffer = (boolean *)ckalloc(buffer_size * sizeof(boolean));
+
+ maxReadNum = buffer_size / (maxReadLen - overlaplen + 1);
+ maxReadNum = maxReadNum % 2 == 0 ? maxReadNum : maxReadNum - 1; //make sure paired reads are processed at the same batch
+ seqBuffer = (char **)ckalloc(maxReadNum * sizeof(char *));
+ lenBuffer = (int *)ckalloc(maxReadNum * sizeof(int));
+ indexArray = (unsigned int *)ckalloc((maxReadNum + 1) * sizeof(unsigned int));
+ ctgIdArray = (unsigned int *)ckalloc((maxReadNum + 1) * sizeof(unsigned int));
+ posArray = (int *)ckalloc((maxReadNum + 1) * sizeof(int));
+ orienArray = (char *)ckalloc((maxReadNum + 1) * sizeof(char));
+ footprint = (char *)ckalloc((maxReadNum + 1) * sizeof(char));
+
+ for(i = 0; i < maxReadNum; i++)
+ seqBuffer[i] = (char *)ckalloc(maxReadLen * sizeof(char));
+
+ rcSeq = (char **)ckalloc((thrd_num + 1) * sizeof(char *));
+ deletion = (int *)ckalloc((thrd_num + 1) * sizeof(int));
+ thrdSignal[0] = 0;
+ deletion[0] = 0;
+
+ if(1)
+ {
+ for(i = 0; i < thrd_num; i++)
+ {
+ rcSeq[i + 1] = (char *)ckalloc(maxReadLen * sizeof(char));
+ deletion[i + 1] = 0;
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ }
+
+ creatThrds(threads, paras);
+ }
+
+ if(!contig_array)
+ basicContigInfo(outfile);
+
+ sprintf(name, "%s.longReadInGap", outfile);
+ outfp2 = ckopen(name, "wb");
+
+ readCounter = 0;
+ kmer_c = n_solexa = read_c = i = libNo;
+ prevLibNo = -1;
+
+ while((flag = read1seqInLib(seqBuffer[read_c], next_name, &(lenBuffer[read_c]), &libNo, pairs, 4)) != 0)
+ {
+ if(libNo != prevLibNo)
+ {
+ prevLibNo = libNo;
+ ALIGNLEN = lib_array[libNo].map_len;
+ ALIGNLEN = ALIGNLEN < 35 ? 35 : ALIGNLEN;
+ //printf("Map_len %d\n",ALIGNLEN);
+ }
+
+ // if((++i)%100000000==0)
+ // printf("%lld reads processed.\n",i);
+ indexArray[read_c] = kmer_c;
+
+ if(lenBuffer[read_c] >= overlaplen + 1)
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+
+ read_c++;
+
+ if(read_c == maxReadNum)
+ {
+ indexArray[read_c] = kmer_c;
+
+ sendWorkSignal(2, thrdSignal);
+ sendWorkSignal(1, thrdSignal);
+ sendWorkSignal(3, thrdSignal);
+
+ recordLongRead(outfp2);
+ kmer_c = 0;
+ read_c = 0;
+ }
+ }
+
+ if(read_c)
+ {
+ indexArray[read_c] = kmer_c;
+ sendWorkSignal(2, thrdSignal);
+ sendWorkSignal(1, thrdSignal);
+ sendWorkSignal(3, thrdSignal);
+ recordLongRead(outfp2);
+ //printf("Output %lld out of %lld (%.1f)%% reads in gaps\n",readsInGap,readCounter,
+ // (float)readsInGap/readCounter*100);
+ }
+
+ sendWorkSignal(5, thrdSignal);
+
+ thread_wait(threads);
+
+ fclose(outfp2);
+
+ free_libs();
+
+ if(1) // multi-threads
+ {
+ for(i = 0; i < thrd_num; i++)
+ {
+ deletion[0] += deletion[i + 1];
+ free((void *)rcSeq[i + 1]);
+ }
+ }
+
+ //printf("%d reads deleted\n",deletion[0]);
+
+ free((void *)rcSeq);
+ free((void *)deletion);
+
+ for(i = 0; i < maxReadNum; i++)
+ free((void *)seqBuffer[i]);
+
+ free((void *)seqBuffer);
+ free((void *)lenBuffer);
+ free((void *)indexArray);
+
+ free((void *)kmerBuffer);
+ free((void *)smallerBuffer);
+ free((void *)hashBanBuffer);
+ free((void *)nodeBuffer);
+ free((void *)ctgIdArray);
+ free((void *)posArray);
+ free((void *)orienArray);
+ free((void *)footprint);
+
+ free((void *)src_name);
+ free((void *)next_name);
+
+}
+
diff --git a/fusion/prlReadFillGap.c b/fusion/prlReadFillGap.c
new file mode 100644
index 0000000..7bce97d
--- /dev/null
+++ b/fusion/prlReadFillGap.c
@@ -0,0 +1,1234 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+#define RDBLOCKSIZE 50
+#define CTGappend 50
+
+static Kmer MAXKMER;
+static int Ncounter;
+static int allGaps;
+
+// for multi threads
+static int *counters;
+static pthread_mutex_t mutex;
+static int scafBufSize = 100;
+static boolean *flagBuf;
+static unsigned char *thrdNoBuf;
+static STACK **ctgStackBuffer;
+static int scafCounter;
+static int scafInBuf;
+
+static void MarkCtgOccu(unsigned int ctg);
+/*
+static void printRead(int len,char *seq)
+{
+ int j;
+ fprintf(stderr,">read\n");
+ for(j=0;j<len;j++)
+ fprintf(stderr,"%c",int2base((int)getCharInTightString(seq,j)));
+ fprintf(stderr,"\n");
+}
+*/
+static void attach1read2contig(unsigned int ctgID, int len, int pos, long long starter)
+{
+ unsigned int ctg = index_array[ctgID]; //new index in contig array
+
+ if(isLargerThanTwin(ctg))
+ {
+ ctg = getTwinCtg(ctg); // put all reads in one contig of a twin
+ pos = contig_array[ctg].length + overlaplen - pos - len;
+ }
+
+ if(!contig_array[ctg].closeReads)
+ contig_array[ctg].closeReads = (STACK *)createStack(RDBLOCKSIZE, sizeof(READNEARBY));
+
+ READNEARBY *rd = (READNEARBY *)stackPush(contig_array[ctg].closeReads);
+ rd->len = len;
+ rd->dis = pos;
+ rd->seqStarter = starter;
+}
+
+static void convertIndex()
+{
+ int *length_array = (int *)ckalloc((num_ctg + 1) * sizeof(int));
+ unsigned int i;
+
+ for(i = 1; i <= num_ctg; i++)
+ length_array[i] = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(index_array[i] > 0)
+ length_array[index_array[i]] = i;
+ }
+
+ for(i = 1; i <= num_ctg; i++)
+ index_array[i] = length_array[i]; //contig i with new index: index_array[i]
+
+ free((void *)length_array);
+
+}
+
+static long long getRead1by1(FILE *fp, DARRAY *readSeqInGap)
+{
+ long long readCounter = 0;
+
+ if(!fp)
+ return readCounter;
+
+ int len, ctgID, pos;
+ long long starter;
+ char *pt;
+ char *freadBuf = (char *)ckalloc((maxReadLen / 4 + 1) * sizeof(char));
+
+ while(fread(&len, sizeof(int), 1, fp) == 1)
+ {
+ if(fread(&ctgID, sizeof(int), 1, fp) != 1)
+ break;
+
+ if(fread(&pos, sizeof(int), 1, fp) != 1)
+ break;
+
+ if(fread(freadBuf, sizeof(char), len / 4 + 1, fp) != (unsigned)(len / 4 + 1))
+ break;
+
+ //put seq to dynamic array
+ starter = readSeqInGap->item_c;
+
+ if(!darrayPut(readSeqInGap, starter + len / 4)) // make sure there's room for this seq
+ break;
+
+ pt = (char *)darrayPut(readSeqInGap, starter);
+ bcopy(freadBuf, pt, len / 4 + 1);
+ attach1read2contig(ctgID, len, pos, starter);
+ readCounter++;
+ }
+
+ free((void *)freadBuf);
+ return readCounter;
+}
+// Darray *readSeqInGap
+static boolean loadReads4gap(char *graphfile)
+{
+ FILE *fp, *fp2;
+ char name[1024];
+ long long readCounter;
+
+ sprintf(name, "%s.readInGap", graphfile);
+ fp = fopen(name, "rb");
+ sprintf(name, "%s.longReadInGap", graphfile);
+ fp2 = fopen(name, "rb");
+
+ if(!fp && !fp2)
+ return 0;
+
+ if(!orig2new)
+ {
+ convertIndex();
+ orig2new = 1;
+ }
+
+ readSeqInGap = (DARRAY *)createDarray(1000000, sizeof(char));
+
+ if(fp)
+ {
+ readCounter = getRead1by1(fp, readSeqInGap);
+ //printf("Loaded %lld reads from %s.readInGap\n",readCounter,graphfile);
+ fclose(fp);
+ }
+
+ if(fp2)
+ {
+ readCounter = getRead1by1(fp2, readSeqInGap);
+ //printf("Loaded %lld reads from %s.LongReadInGap\n",readCounter,graphfile);
+ fclose(fp2);
+ }
+
+ return 1;
+}
+
+static void debugging1()
+{
+ unsigned int i;
+
+ if(orig2new)
+ {
+ unsigned int *length_array = (unsigned int *)ckalloc((num_ctg + 1) * sizeof(unsigned int));
+
+ //use length_array to change info in index_array
+ for(i = 1; i <= num_ctg; i++)
+ length_array[i] = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(index_array[i] > 0)
+ length_array[index_array[i]] = i;
+ }
+
+ for(i = 1; i <= num_ctg; i++)
+ index_array[i] = length_array[i]; //contig i with original index: index_array[i]
+
+ orig2new = 0;
+ }
+
+ READNEARBY *rd;
+ int j;
+ char *pt;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(!contig_array[i].closeReads)
+ continue;
+
+ if(index_array[i] != 735)
+ continue;
+
+ //printf("contig %d, len %d: \n",index_array[i],contig_array[i].length);
+ stackBackup(contig_array[i].closeReads);
+
+ while((rd = (READNEARBY *)stackPop(contig_array[i].closeReads)) != NULL)
+ {
+ printf("%d\t%d\t%lld\t", rd->dis, rd->len, rd->seqStarter);
+ pt = (char *)darrayGet(readSeqInGap, rd->seqStarter);
+
+ for(j = 0; j < rd->len; j++)
+ printf("%c", int2base((int)getCharInTightString(pt, j)));
+
+ printf("\n");
+ }
+
+ stackRecover(contig_array[i].closeReads);
+ }
+
+}
+
+static void initiateCtgInScaf(CTGinSCAF *actg)
+{
+ actg->cutTail = 0;
+ actg->cutHead = overlaplen;
+ actg->gapSeqLen = 0;
+}
+
+static int procGap(char *line, STACK *ctgsStack)
+{
+ char *tp;
+ int length, i, seg;
+ unsigned int ctg;
+ CTGinSCAF *ctgPt;
+
+ tp = strtok(line, " ");
+ tp = strtok(NULL, " "); //length
+ length = atoi(tp);
+ tp = strtok(NULL, " "); //seg
+ seg = atoi(tp);
+
+ if(!seg)
+ return length;
+
+ for(i = 0; i < seg; i++)
+ {
+ tp = strtok(NULL, " ");
+ ctg = atoi(tp);
+ MarkCtgOccu(ctg);
+ ctgPt = (CTGinSCAF *)stackPush(ctgsStack);
+ initiateCtgInScaf(ctgPt);
+ ctgPt->ctgID = ctg;
+ ctgPt->start = 0;
+ ctgPt->end = 0;
+ ctgPt->scaftig_start = 0;
+ ctgPt->mask = 1;
+ }
+
+ return length;
+}
+
+static void debugging2(int index, STACK *ctgsStack)
+{
+ CTGinSCAF *actg;
+
+ stackBackup(ctgsStack);
+ printf(">scaffold%d\t%d 0.0\n", index, ctgsStack->item_c);
+
+ while((actg = stackPop(ctgsStack)) != NULL)
+ {
+ printf("%d\t%d\t%d\t%d\n",
+ actg->ctgID, actg->start, actg->end, actg->scaftig_start);
+ }
+
+ stackRecover(ctgsStack);
+}
+
+static int cmp_reads(const void *a, const void *b)
+{
+ READNEARBY *A, *B;
+ A = (READNEARBY *)a;
+ B = (READNEARBY *)b;
+
+ if(A->dis > B->dis)
+ return 1;
+ else if(A->dis == B->dis)
+ return 0;
+ else
+ return -1;
+}
+
+static void cutRdArray(READNEARBY *rdArray, int gapStart, int gapEnd, int *count, int arrayLen, READNEARBY *cutArray)
+{
+ int i;
+ int num = 0;
+
+ for(i = 0; i < arrayLen; i++)
+ {
+ if(rdArray[i].dis > gapEnd)
+ break;
+
+ if((rdArray[i].dis + rdArray[i].len) >= gapStart)
+ {
+ cutArray[num].dis = rdArray[i].dis;
+ cutArray[num].len = rdArray[i].len;
+ cutArray[num++].seqStarter = rdArray[i].seqStarter;
+ }
+ }
+
+ *count = num;
+}
+
+static void outputTightStr(FILE *fp, char *tightStr, int start, int length, int outputlen, int revS, int *col)
+{
+ int i;
+ int end;
+ int column = *col;
+
+ if(!revS)
+ {
+ end = start + outputlen <= length ? start + outputlen : length;
+
+ for(i = start; i < end; i++)
+ {
+ fprintf(fp, "%c", int2base((int)getCharInTightString(tightStr, i)));
+
+ if((++column) % 100 == 0)
+ {
+ //column = 0;
+ fprintf(fp, "\n");
+ }
+ }
+ }
+ else
+ {
+ end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
+
+ for(i = length - 1 - start; i >= end; i--)
+ {
+ fprintf(fp, "%c", int2compbase((int)getCharInTightString(tightStr, i)));
+
+ if((++column) % 100 == 0)
+ {
+ fprintf(fp, "\n");
+ //column = 0;
+ }
+ }
+ }
+
+ *col = column;
+}
+
+static void outputTightStrLowerCase(FILE *fp, char *tightStr, int start, int length, int outputlen, int revS, int *col)
+{
+ int i;
+ int end;
+ int column = *col;
+
+ if(!revS)
+ {
+ end = start + outputlen <= length ? start + outputlen : length;
+
+ for(i = start; i < end; i++)
+ {
+ fprintf(fp, "%c", "actg"[(int)getCharInTightString(tightStr, i)]);
+
+ if((++column) % 100 == 0)
+ {
+ //column = 0;
+ fprintf(fp, "\n");
+ }
+ }
+ }
+ else
+ {
+ end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
+
+ for(i = length - 1 - start; i >= end; i--)
+ {
+ fprintf(fp, "%c", "tgac"[(int)getCharInTightString(tightStr, i)]);
+
+ if((++column) % 100 == 0)
+ {
+ fprintf(fp, "\n");
+ //column = 0;
+ }
+ }
+ }
+
+ *col = column;
+}
+
+static void outputNs(FILE *fp, int gapN, int *col)
+{
+ int i, column = *col;
+
+ for(i = 0; i < gapN; i++)
+ {
+ fprintf(fp, "N");
+
+ if((++column) % 100 == 0)
+ {
+ //column = 0;
+ fprintf(fp, "\n");
+ }
+ }
+
+ *col = column;
+}
+
+static void outputGapInfo(unsigned int ctg1, unsigned int ctg2)
+{
+ unsigned int bal_ctg1 = getTwinCtg(ctg1);
+ unsigned int bal_ctg2 = getTwinCtg(ctg2);
+
+ if(isLargerThanTwin(ctg1))
+ fprintf(stderr, "%d\t", index_array[bal_ctg1]);
+ else
+ fprintf(stderr, "%d\t", index_array[ctg1]);
+
+ if(isLargerThanTwin(ctg2))
+ fprintf(stderr, "%d\n", index_array[bal_ctg2]);
+ else
+ fprintf(stderr, "%d\n", index_array[ctg2]);
+}
+
+static void output1gap(FILE *fo, int scafIndex, CTGinSCAF *prevCtg, CTGinSCAF *actg, DARRAY *gapSeqArray)
+{
+ unsigned int ctg1, bal_ctg1, length1;
+ int start1, outputlen1;
+ unsigned int ctg2, bal_ctg2, length2;
+ int start2, outputlen2;
+ char *pt;
+ int column = 0;
+
+ ctg1 = prevCtg->ctgID;
+ bal_ctg1 = getTwinCtg(ctg1);
+ start1 = prevCtg->cutHead;
+ length1 = contig_array[ctg1].length + overlaplen;
+
+ if(length1 - prevCtg->cutTail - start1 > CTGappend)
+ {
+ outputlen1 = CTGappend;
+ start1 = length1 - prevCtg->cutTail - outputlen1;
+ }
+ else
+ outputlen1 = length1 - prevCtg->cutTail - start1;
+
+ ctg2 = actg->ctgID;
+ bal_ctg2 = getTwinCtg(ctg2);
+ start2 = actg->cutHead;
+ length2 = contig_array[ctg2].length + overlaplen;
+
+ if(length2 - actg->cutTail - start2 > CTGappend)
+ {
+ outputlen2 = CTGappend;
+ }
+ else
+ outputlen2 = length2 - actg->cutTail - start2;
+
+ if(isLargerThanTwin(ctg1))
+ fprintf(fo, ">S%d_C%d_L%d_G%d", scafIndex, index_array[bal_ctg1], outputlen1, prevCtg->gapSeqLen);
+ else
+ fprintf(fo, ">S%d_C%d_L%d_G%d", scafIndex, index_array[ctg1], outputlen1, prevCtg->gapSeqLen);
+
+ if(isLargerThanTwin(ctg2))
+ fprintf(fo, "_C%d_L%d\n", index_array[bal_ctg2], outputlen2);
+ else
+ fprintf(fo, "_C%d_L%d\n", index_array[ctg2], outputlen2);
+
+ if(contig_array[ctg1].seq)
+ outputTightStr(fo, contig_array[ctg1].seq, start1, length1, outputlen1, 0, &column);
+ else if(contig_array[bal_ctg1].seq)
+ outputTightStr(fo, contig_array[bal_ctg1].seq, start1, length1, outputlen1, 1, &column);
+
+ pt = (char *)darrayPut(gapSeqArray, prevCtg->gapSeqOffset);
+ outputTightStrLowerCase(fo, pt, 0, prevCtg->gapSeqLen, prevCtg->gapSeqLen, 0, &column);
+
+ if(contig_array[ctg2].seq)
+ outputTightStr(fo, contig_array[ctg2].seq, start2, length2, outputlen2, 0, &column);
+ else if(contig_array[bal_ctg2].seq)
+ outputTightStr(fo, contig_array[bal_ctg2].seq, start2, length2, outputlen2, 1, &column);
+
+ fprintf(fo, "\n");
+}
+
+static void outputGapSeq(FILE *fo, int index, STACK *ctgsStack, DARRAY *gapSeqArray)
+{
+ CTGinSCAF *actg, *prevCtg = NULL;
+ stackRecover(ctgsStack);
+
+ while((actg = stackPop(ctgsStack)) != NULL)
+ {
+ if(prevCtg && prevCtg->gapSeqLen > 0)
+ output1gap(fo, index, prevCtg, actg, gapSeqArray);
+
+ prevCtg = actg;
+
+ }
+
+}
+
+static void outputScafSeq(FILE *fo, int index, STACK *ctgsStack, DARRAY *gapSeqArray)
+{
+ CTGinSCAF *actg, *prevCtg = NULL;
+ unsigned int ctg, bal_ctg, length;
+ int start, outputlen, gapN;
+ char *pt;
+ int column = 0;
+ long long cvgSum = 0;
+ int lenSum = 0;
+
+ stackRecover(ctgsStack);
+
+ while((actg = stackPop(ctgsStack)) != NULL)
+ {
+ if(!(contig_array[actg->ctgID].cvg > 0))
+ continue;
+
+ lenSum += contig_array[actg->ctgID].length;
+ cvgSum += contig_array[actg->ctgID].length * contig_array[actg->ctgID].cvg;
+ }
+
+ if(lenSum > 0)
+ fprintf(fo, ">scaffold%d %4.1f\n", index, (double)cvgSum / lenSum);
+ else
+ fprintf(fo, ">scaffold%d 0.0\n", index);
+
+ stackRecover(ctgsStack);
+
+ while((actg = stackPop(ctgsStack)) != NULL)
+ {
+ ctg = actg->ctgID;
+ bal_ctg = getTwinCtg(ctg);
+ length = contig_array[ctg].length + overlaplen;
+
+ if(prevCtg && actg->scaftig_start)
+ {
+ gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length;
+ gapN = gapN > 0 ? gapN : 1;
+ outputNs(fo, gapN, &column);
+ //outputGapInfo(prevCtg->ctgID,ctg);
+ Ncounter++;
+ }
+
+ if(!prevCtg)
+ start = 0;
+ else
+ start = actg->cutHead;
+
+ outputlen = length - start - actg->cutTail;
+
+ if(contig_array[ctg].seq)
+ outputTightStr(fo, contig_array[ctg].seq, start, length, outputlen, 0, &column);
+ else if(contig_array[bal_ctg].seq)
+ outputTightStr(fo, contig_array[bal_ctg].seq, start, length, outputlen, 1, &column);
+
+ if(actg->gapSeqLen < 1)
+ {
+ prevCtg = actg;
+ continue;
+ }
+
+ pt = (char *)darrayPut(gapSeqArray, actg->gapSeqOffset);
+ outputTightStrLowerCase(fo, pt, 0, actg->gapSeqLen, actg->gapSeqLen, 0, &column);
+
+ prevCtg = actg;
+ }
+
+ fprintf(fo, "\n");
+
+}
+
+static void fill1scaf(int index, STACK *ctgsStack, int thrdID);
+static void check1scaf(int t, int thrdID)
+{
+ if(flagBuf[t])
+ return;
+
+ boolean late = 0;
+ pthread_mutex_lock(&mutex);
+
+ if(!flagBuf[t])
+ {
+ flagBuf[t] = 1;
+ thrdNoBuf[t] = thrdID;
+ }
+ else
+ late = 1;
+
+ pthread_mutex_unlock(&mutex);
+
+ if(late)
+ return;
+
+ counters[thrdID]++;
+ fill1scaf(scafCounter + t + 1, ctgStackBuffer[t], thrdID);
+}
+
+static void fill1scaf(int index, STACK *ctgsStack, int thrdID)
+{
+
+ CTGinSCAF *actg, *prevCtg = NULL;
+ READNEARBY *rdArray, *rdArray4gap, *rd;
+ int numRd = 0, count, maxGLen = 0;
+ unsigned int ctg, bal_ctg;
+ STACK *rdStack;
+
+ while((actg = stackPop(ctgsStack)) != NULL)
+ {
+ if(prevCtg)
+ maxGLen = maxGLen < (actg->start - prevCtg->end) ? (actg->start - prevCtg->end) : maxGLen;
+
+ ctg = actg->ctgID;
+ bal_ctg = getTwinCtg(ctg);
+
+ if(actg->mask)
+ {
+ prevCtg = actg;
+ continue;
+ }
+
+ if(contig_array[ctg].closeReads)
+ numRd += contig_array[ctg].closeReads->item_c;
+ else if(contig_array[bal_ctg].closeReads)
+ numRd += contig_array[bal_ctg].closeReads->item_c;
+
+ prevCtg = actg;
+ }
+
+ if(numRd < 1)
+ return;
+
+ rdArray = (READNEARBY *)ckalloc(numRd * sizeof(READNEARBY));
+ rdArray4gap = (READNEARBY *)ckalloc(numRd * sizeof(READNEARBY));
+ //fprintf(stderr,"scaffold%d reads4gap %d\n",index,numRd);
+
+ // collect reads appended to contigs in this scaffold
+ int numRd2 = 0;
+ stackRecover(ctgsStack);
+
+ while((actg = stackPop(ctgsStack)) != NULL)
+ {
+ ctg = actg->ctgID;
+ bal_ctg = getTwinCtg(ctg);
+
+ if(actg->mask)
+ continue;
+
+ if(contig_array[ctg].closeReads)
+ rdStack = contig_array[ctg].closeReads;
+ else if(contig_array[bal_ctg].closeReads)
+ rdStack = contig_array[bal_ctg].closeReads;
+ else
+ continue;
+
+ stackBackup(rdStack);
+
+ while((rd = (READNEARBY *)stackPop(rdStack)) != NULL)
+ {
+ rdArray[numRd2].len = rd->len;
+ rdArray[numRd2].seqStarter = rd->seqStarter;
+
+ if(isSmallerThanTwin(ctg))
+ rdArray[numRd2++].dis = actg->start - overlaplen + rd->dis;
+ else
+ rdArray[numRd2++].dis = actg->start - overlaplen +
+ contig_array[ctg].length - rd->len - rd->dis;
+ }
+
+ stackRecover(rdStack);
+ }
+
+ if(numRd2 != numRd)
+ printf("##reads numbers doesn't match, %d vs %d when scaffold %d\n", numRd, numRd2, index);
+
+ qsort(rdArray, numRd, sizeof(READNEARBY), cmp_reads);
+ //fill gap one by one
+ int gapStart, gapEnd;
+ int numIn = 0;
+ boolean flag;
+ int buffer_size = maxReadLen > 100 ? maxReadLen : 100;
+ int maxGSLen = maxGLen + GLDiff < 10 ? 10 : maxGLen + GLDiff;
+ //fprintf(stderr,"maxGlen %d, maxGSlen %d\n",maxGLen,maxGSLen);
+
+ char *seqGap = (char *)ckalloc(maxGSLen * sizeof(char)); // temp array for gap sequence
+ Kmer *kmerCtg1 = (Kmer *)ckalloc(buffer_size * sizeof(Kmer));
+ Kmer *kmerCtg2 = (Kmer *)ckalloc(buffer_size * sizeof(Kmer));
+ char *seqCtg1 = (char *)ckalloc(buffer_size * sizeof(char));
+ char *seqCtg2 = (char *)ckalloc(buffer_size * sizeof(char));
+ prevCtg = NULL;
+ stackRecover(ctgsStack);
+
+ while((actg = stackPop(ctgsStack)) != NULL)
+ {
+ if(!prevCtg || !actg->scaftig_start)
+ {
+ prevCtg = actg;
+ continue;
+ }
+
+ gapStart = prevCtg->end - 100;
+ gapEnd = actg->start - overlaplen + 100;
+
+ cutRdArray(rdArray, gapStart, gapEnd, &count, numRd, rdArray4gap);
+
+ numIn += count;
+ /*
+ if(!count){
+ prevCtg = actg;
+ continue;
+ }
+ */
+ int overlap;
+
+ for(overlap = overlaplen; overlap > 14; overlap -= 2)
+ {
+
+ flag = localGraph(rdArray4gap, count, prevCtg, actg,
+ overlaplen, kmerCtg1, kmerCtg2, overlap, darrayBuf[thrdID],
+ seqCtg1, seqCtg2, seqGap);
+
+ //free_kmerset(kmerSet);
+
+ if(flag == 1)
+ {
+ /*
+ fprintf(stderr,"Between ctg %d and %d, Found with %d\n",prevCtg->ctgID
+ ,actg->ctgID,overlap);
+ */
+ break;
+ }
+ }
+
+ /*
+ if(count==0)
+ printf("Gap closed without reads\n");
+ if(!flag)
+ fprintf(stderr,"Between ctg %d and %d, NO routes found\n",prevCtg->ctgID,actg->ctgID);
+ */
+
+ prevCtg = actg;
+ }
+
+ //fprintf(stderr,"____scaffold%d reads in gap %d\n",index,numIn);
+ free((void *)seqGap);
+ free((void *)kmerCtg1);
+ free((void *)kmerCtg2);
+ free((void *)seqCtg1);
+ free((void *)seqCtg2);
+ free((void *)rdArray);
+ free((void *)rdArray4gap);
+}
+
+static void reverseStack(STACK *dStack, STACK *sStack)
+{
+ CTGinSCAF *actg, *ctgPt;
+ emptyStack(dStack);
+
+ while((actg = (CTGinSCAF *)stackPop(sStack)) != NULL)
+ {
+ ctgPt = (CTGinSCAF *)stackPush(dStack);
+ ctgPt->ctgID = actg->ctgID;
+ ctgPt->start = actg->start;
+ ctgPt->end = actg->end;
+ ctgPt->scaftig_start = actg->scaftig_start;
+ ctgPt->mask = actg->mask;
+ ctgPt->cutHead = actg->cutHead;
+ ctgPt->cutTail = actg->cutTail;
+ ctgPt->gapSeqLen = actg->gapSeqLen;
+ ctgPt->gapSeqOffset = actg->gapSeqOffset;
+ }
+
+ stackBackup(dStack);
+}
+
+static Kmer tightStr2Kmer(char *tightStr, int start, int length, int revS)
+{
+ int i;
+ Kmer word = 0;
+
+ if(!revS)
+ {
+ if(start + overlaplen > length)
+ {
+ printf("tightStr2Kmer A: no enough bases for kmer\n");
+ return word;
+ }
+
+ for(i = start; i < start + overlaplen; i++)
+ {
+ word <<= 2;
+ word += getCharInTightString(tightStr, i);
+ }
+ }
+ else
+ {
+ if(length - start - overlaplen < 0)
+ {
+ printf("tightStr2Kmer B: no enough bases for kmer\n");
+ return word;
+ }
+
+ for(i = length - 1 - start; i > length - 1 - start - overlaplen; i--)
+ {
+ word <<= 2;
+ word += int_comp(getCharInTightString(tightStr, i));
+ }
+ }
+
+ return word;
+}
+
+static Kmer maxKmer()
+{
+ Kmer word = 0;
+ int i;
+
+ for(i = 0; i < overlaplen; i++)
+ {
+ word <<= 2;
+ word += 0x3;
+ }
+
+ return word;
+}
+static int contigCatch(unsigned int prev_ctg, unsigned int ctg)
+{
+ if(contig_array[prev_ctg].length == 0 || contig_array[ctg].length == 0)
+ return 0;
+
+ Kmer kmerAtEnd, kmerAtStart;
+ Kmer MaxKmer;
+ unsigned int bal_ctg1 = getTwinCtg(prev_ctg);
+ unsigned int bal_ctg2 = getTwinCtg(ctg);
+ int i, start;
+ int len1 = contig_array[prev_ctg].length + overlaplen;
+ int len2 = contig_array[ctg].length + overlaplen;
+
+ start = contig_array[prev_ctg].length;
+
+ if(contig_array[prev_ctg].seq)
+ kmerAtEnd = tightStr2Kmer(contig_array[prev_ctg].seq, start, len1, 0);
+ else
+ kmerAtEnd = tightStr2Kmer(contig_array[bal_ctg1].seq, start, len1, 1);
+
+ start = 0;
+
+ if(contig_array[ctg].seq)
+ kmerAtStart = tightStr2Kmer(contig_array[ctg].seq, start, len2, 0);
+ else
+ kmerAtStart = tightStr2Kmer(contig_array[bal_ctg2].seq, start, len2, 1);
+
+ MaxKmer = MAXKMER;
+
+ for(i = 0; i < 10; i++)
+ {
+ if((kmerAtStart ^ kmerAtEnd) == 0)
+ break;
+
+ MaxKmer >>= 2;
+ kmerAtEnd &= MaxKmer;
+ kmerAtStart >>= 2;
+ }
+
+ if(i < 10)
+ {
+ return overlaplen - i;
+ }
+ else
+ return 0;
+}
+
+
+static void initStackBuf(STACK **ctgStackBuffer, int scafBufSize)
+{
+ int i;
+
+ for(i = 0; i < scafBufSize; i++)
+ {
+ flagBuf[i] = 1;
+ ctgStackBuffer[i] = (STACK *)createStack(100, sizeof(CTGinSCAF));
+ }
+}
+static void freeStackBuf(STACK **ctgStackBuffer, int scafBufSize)
+{
+ int i;
+
+ for(i = 0; i < scafBufSize; i++)
+ freeStack(ctgStackBuffer[i]);
+}
+
+static void threadRoutine(void *para)
+{
+ PARAMETER *prm;
+ int i;
+
+ prm = (PARAMETER *)para;
+
+ //printf("%dth thread with threadID %d, hash_table %p\n",id,prm.threadID,prm.hash_table);
+ while(1)
+ {
+ if(*(prm->selfSignal) == 1)
+ {
+ emptyDarray(darrayBuf[prm->threadID]);
+
+ for(i = 0; i < scafInBuf; i++)
+ check1scaf(i, prm->threadID);
+
+ *(prm->selfSignal) = 0;
+ }
+ else if(*(prm->selfSignal) == 2)
+ {
+ *(prm->selfSignal) = 0;
+ break;
+ }
+
+ usleep(1);
+ }
+}
+
+static void creatThrds(pthread_t *threads, PARAMETER *paras)
+{
+ unsigned char i;
+ int temp;
+
+ for(i = 0; i < thrd_num; i++)
+ {
+ if((temp = pthread_create(&threads[i], NULL, (void *)threadRoutine, &(paras[i]))) != 0)
+ {
+ printf("create threads failed\n");
+ exit(1);
+ }
+ }
+
+ //printf("%d thread created\n...\n",thrd_num);
+}
+
+static void sendWorkSignal(unsigned char SIG, unsigned char *thrdSignals)
+{
+ int t;
+
+ for(t = 0; t < thrd_num; t++)
+ thrdSignals[t + 1] = SIG;
+
+ while(1)
+ {
+ usleep(10);
+
+ for(t = 0; t < thrd_num; t++)
+ if(thrdSignals[t + 1])
+ break;
+
+ if(t == thrd_num)
+ break;
+ }
+}
+
+static void thread_wait(pthread_t *threads)
+{
+ int i;
+
+ for(i = 0; i < thrd_num; i++)
+ if(threads[i] != 0)
+ pthread_join(threads[i], NULL);
+
+}
+
+static void outputSeqs(FILE *fo, FILE *fo2, int scafInBuf)
+{
+ int i, thrdID;
+
+ for(i = 0; i < scafInBuf; i++)
+ {
+ thrdID = thrdNoBuf[i];
+ outputScafSeq(fo, scafCounter + i + 1, ctgStackBuffer[i], darrayBuf[thrdID]);
+ outputGapSeq(fo2, scafCounter + i + 1, ctgStackBuffer[i], darrayBuf[thrdID]);
+ }
+}
+
+static void MaskContig(unsigned int ctg)
+{
+ contig_array[ctg].mask = 1;
+ contig_array[getTwinCtg(ctg)].mask = 1;
+}
+
+static void MarkCtgOccu(unsigned int ctg)
+{
+ contig_array[ctg].flag = 1;
+ contig_array[getTwinCtg(ctg)].flag = 1;
+}
+
+static void output_ctg(unsigned int ctg, FILE *fo)
+{
+ if(contig_array[ctg].length < 1)
+ return;
+
+ int len;
+ unsigned int bal_ctg = getTwinCtg(ctg);
+
+ len = contig_array[ctg].length + overlaplen;
+
+ int col = 0;
+
+ if(contig_array[ctg].seq)
+ {
+ fprintf(fo, ">C%d %4.1f\n", ctg, (double)contig_array[ctg].cvg);
+ outputTightStr(fo, contig_array[ctg].seq, 0, len, len, 0, &col);
+ }
+ else if(contig_array[bal_ctg].seq)
+ {
+ fprintf(fo, ">C%d %4.1f\n", bal_ctg, (double)contig_array[ctg].cvg);
+ outputTightStr(fo, contig_array[bal_ctg].seq, 0, len, len, 0, &col);
+ }
+
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ fprintf(fo, "\n");
+}
+
+void prlReadsCloseGap(char *graphfile)
+{
+ //thrd_num=1;
+ /*if(fillGap){
+ boolean flag;
+ //printf("\nStart to load reads for gap filling. %d length discrepancy is allowed\n",GLDiff);
+ //printf("...\n");
+ flag = loadReads4gap(graphfile);
+ if(!flag)
+ return;
+ }*/
+
+ if(orig2new)
+ {
+ convertIndex();
+ orig2new = 0;
+ }
+
+ FILE *fp, *fo, *fo2;
+ char line[1024];
+ CTGinSCAF *actg;
+ STACK *ctgStack, *aStack;
+ int index = 0, offset = 0, counter, overallLen;
+ int i, starter, prev_start, gapLen, catchable;
+ unsigned int ctg, prev_ctg = 0;
+ boolean IsPrevGap;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+
+ for(ctg = 1; ctg <= num_ctg; ctg++)
+ contig_array[ctg].flag = 0;
+
+ MAXKMER = maxKmer();
+
+ ctgStack = (STACK *)createStack(1000, sizeof(CTGinSCAF));
+
+ sprintf(line, "%s.scaf_gap", graphfile);
+ fp = ckopen(line, "r");
+ sprintf(line, "%s.scafSeq", graphfile);
+ fo = ckopen(line, "w");
+
+ sprintf(line, "%s.gapSeq", graphfile);
+ fo2 = ckopen(line, "w");
+
+ pthread_mutex_init(&mutex, NULL);
+
+ flagBuf = (boolean *)ckalloc(scafBufSize * sizeof(boolean));;
+ thrdNoBuf = (unsigned char *)ckalloc(scafBufSize * sizeof(unsigned char));;
+ memset(thrdNoBuf, 0, scafBufSize * sizeof(char));
+
+ ctgStackBuffer = (STACK **)ckalloc(scafBufSize * sizeof(STACK *));
+ initStackBuf(ctgStackBuffer, scafBufSize);
+
+ darrayBuf = (DARRAY **)ckalloc(thrd_num * sizeof(DARRAY *));
+ counters = (int *)ckalloc(thrd_num * sizeof(int));
+
+ /*for(i=0;i<thrd_num;i++){
+ counters[i] = 0;
+ darrayBuf[i] = (DARRAY *)createDarray(100000,sizeof(char));
+ thrdSignal[i+1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i+1];
+ }
+ if(fillGap)
+ creatThrds(threads,paras);*/
+
+ Ncounter = scafCounter = scafInBuf = allGaps = 0;
+
+ while(fgets(line, sizeof(line), fp) != NULL)
+ {
+ if(line[0] == '>')
+ {
+ if(index)
+ {
+ aStack = ctgStackBuffer[scafInBuf];
+ flagBuf[scafInBuf++] = 0;
+ reverseStack(aStack, ctgStack);
+
+ if(scafInBuf == scafBufSize)
+ {
+ /*if(fillGap)
+ sendWorkSignal(1,thrdSignal);*/
+
+ outputSeqs(fo, fo2, scafInBuf);
+ scafCounter += scafInBuf;
+ scafInBuf = 0;
+ }
+
+ //if(index%1000==0)
+ //printf("Processed %d scaffolds\n",index);
+
+ }
+
+ //read next scaff
+ emptyStack(ctgStack);
+ IsPrevGap = offset = prev_ctg = 0;
+ sscanf(line + 9, "%d %d %d", &index, &counter, &overallLen);
+ continue;
+ }
+
+ if(line[0] == 'G') // gap appears
+ {
+ /*if(fillGap){
+ gapLen = procGap(line,ctgStack);
+ IsPrevGap = 1;
+ }*/
+ continue;
+ }
+
+ if(line[0] >= '0' && line[0] <= '9') // a contig line
+ {
+ sscanf(line, "%d %d", &ctg, &starter);
+ actg = (CTGinSCAF *)stackPush(ctgStack);
+ actg->ctgID = ctg;
+
+ if(contig_array[ctg].flag)
+ MaskContig(ctg);
+ else
+ MarkCtgOccu(ctg);
+
+ initiateCtgInScaf(actg);
+
+ if(!prev_ctg)
+ actg->cutHead = 0;
+ else if(!IsPrevGap)
+ allGaps++;
+
+ if(!IsPrevGap)
+ {
+ if(prev_ctg && (starter - prev_start - (int)contig_array[prev_ctg].length)
+ < ((int)overlaplen * 4))
+ {
+ /*
+ if(fillGap)
+ catchable = contigCatch(prev_ctg,ctg);
+ else
+ */
+ catchable = 0;
+
+ if(catchable) // prev_ctg and ctg overlap **bp
+ {
+ allGaps--;
+ /*
+ if(isLargerThanTwin(prev_ctg))
+ fprintf(stderr,"%d ####### by_overlap\n",getTwinCtg(prev_ctg));
+ else
+ fprintf(stderr,"%d ####### by_overlap\n",prev_ctg);
+ */
+ actg->scaftig_start = 0;
+ actg->cutHead = catchable;
+ offset += - (starter - prev_start - contig_array[prev_ctg].length) +
+ (overlaplen - catchable);
+ }
+ else
+ actg->scaftig_start = 1;
+
+ }
+ else
+ actg->scaftig_start = 1;
+ }
+ else
+ {
+ offset += - (starter - prev_start - contig_array[prev_ctg].length) + gapLen;
+ actg->scaftig_start = 0;
+ }
+
+ actg->start = starter + offset;
+ actg->end = actg->start + contig_array[ctg].length - 1;
+ actg->mask = contig_array[ctg].mask;
+ IsPrevGap = 0;
+ prev_ctg = ctg;
+ prev_start = starter;
+ }
+ }
+
+ if(index)
+ {
+ aStack = ctgStackBuffer[scafInBuf];
+ flagBuf[scafInBuf++] = 0;
+ reverseStack(aStack, ctgStack);
+
+ if(fillGap)
+ sendWorkSignal(1, thrdSignal);
+
+ outputSeqs(fo, fo2, scafInBuf);
+ }
+
+ /*if(fillGap){
+ sendWorkSignal(2,thrdSignal);
+ thread_wait(threads);
+ }*/
+ for(ctg = 1; ctg <= num_ctg; ctg++)
+ {
+ if((contig_array[ctg].length + overlaplen) < 100 ||
+ contig_array[ctg].flag)
+ continue;
+
+ output_ctg(ctg, fo);
+
+ }
+
+ //printf("Done with %d scaffolds, %d gaps finished, %d gaps overall\n",index,allGaps-Ncounter,allGaps);
+ //printf("scaffolds outputted : %d.\n",index);
+ index = 0;
+
+ for(i = 0; i < thrd_num; i++)
+ {
+ freeDarray(darrayBuf[i]);
+ index += counters[i];
+ }
+
+ if(fillGap)
+ //printf("Threads processed %d scaffolds\n",index);
+ free((void *)darrayBuf);
+
+ if(readSeqInGap)
+ freeDarray(readSeqInGap);
+
+ fclose(fp);
+ fclose(fo);
+ fclose(fo2);
+ freeStack(ctgStack);
+ freeStackBuf(ctgStackBuffer, scafBufSize);
+ free((void *)flagBuf);
+ free((void *)thrdNoBuf);
+ free((void *)ctgStackBuffer);
+}
diff --git a/fusion/read2scaf.c b/fusion/read2scaf.c
new file mode 100644
index 0000000..05dda65
--- /dev/null
+++ b/fusion/read2scaf.c
@@ -0,0 +1,294 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+static int Ncounter;
+static int allGaps;
+
+// for multi threads
+static int scafBufSize = 100;
+static STACK **ctgStackBuffer;
+static int scafCounter;
+static int scafInBuf;
+
+static void convertIndex()
+{
+ int *length_array = (int *)ckalloc((num_ctg + 1) * sizeof(int));
+ unsigned int i;
+
+ for(i = 1; i <= num_ctg; i++)
+ length_array[i] = 0;
+
+ for(i = 1; i <= num_ctg; i++)
+ {
+ if(index_array[i] > 0)
+ length_array[index_array[i]] = i;
+ }
+
+ for(i = 1; i <= num_ctg; i++)
+ index_array[i] = length_array[i]; //contig i with new index: index_array[i]
+
+ free((void *)length_array);
+
+}
+
+static void reverseStack(STACK *dStack, STACK *sStack)
+{
+ CTGinSCAF *actg, *ctgPt;
+ emptyStack(dStack);
+
+ while((actg = (CTGinSCAF *)stackPop(sStack)) != NULL)
+ {
+ ctgPt = (CTGinSCAF *)stackPush(dStack);
+ ctgPt->ctgID = actg->ctgID;
+ ctgPt->start = actg->start;
+ ctgPt->end = actg->end;
+ }
+
+ stackBackup(dStack);
+}
+
+static void initStackBuf(STACK **ctgStackBuffer, int scafBufSize)
+{
+ int i;
+
+ for(i = 0; i < scafBufSize; i++)
+ ctgStackBuffer[i] = (STACK *)createStack(100, sizeof(CTGinSCAF));
+
+}
+static void freeStackBuf(STACK **ctgStackBuffer, int scafBufSize)
+{
+ int i;
+
+ for(i = 0; i < scafBufSize; i++)
+ freeStack(ctgStackBuffer[i]);
+}
+
+static void mapCtg2Scaf(int scafInBuf)
+{
+ int i, scafID;
+ CTGinSCAF *actg;
+ STACK *ctgsStack;
+ unsigned int ctg, bal_ctg;
+
+ for(i = 0; i < scafInBuf; i++)
+ {
+ scafID = scafCounter + i + 1;
+ ctgsStack = ctgStackBuffer[i];
+
+ while((actg = stackPop(ctgsStack)) != NULL)
+ {
+ ctg = actg->ctgID;
+ bal_ctg = getTwinCtg(ctg);
+
+ if(contig_array[ctg].from_vt != 0)
+ {
+ contig_array[ctg].multi = 1;
+ contig_array[bal_ctg].multi = 1;
+ continue;
+ }
+
+ contig_array[ctg].from_vt = scafID;
+ contig_array[ctg].to_vt = actg->start;
+ contig_array[ctg].flag = 0; //ctg and scaf on the same strand
+ contig_array[bal_ctg].from_vt = scafID;
+ contig_array[bal_ctg].to_vt = actg->start;
+ contig_array[bal_ctg].flag = 1;
+ }
+ }
+
+}
+
+static void locateContigOnscaff(char *graphfile)
+{
+
+ FILE *fp;
+ char line[1024];
+ CTGinSCAF *actg;
+ STACK *ctgStack, *aStack;
+ int index = 0, counter, overallLen;
+ int starter, prev_start, gapN, scafLen;
+ unsigned int ctg, prev_ctg = 0;
+
+ for(ctg = 1; ctg <= num_ctg; ctg++)
+ {
+ contig_array[ctg].from_vt = 0;
+ contig_array[ctg].multi = 0;
+ }
+
+ ctgStack = (STACK *)createStack(1000, sizeof(CTGinSCAF));
+
+ sprintf(line, "%s.scaf_gap", graphfile);
+ fp = ckopen(line, "r");
+
+ ctgStackBuffer = (STACK **)ckalloc(scafBufSize * sizeof(STACK *));
+ initStackBuf(ctgStackBuffer, scafBufSize);
+
+
+ Ncounter = scafCounter = scafInBuf = allGaps = 0;
+
+ while(fgets(line, sizeof(line), fp) != NULL)
+ {
+ if(line[0] == '>')
+ {
+ if(index)
+ {
+ aStack = ctgStackBuffer[scafInBuf++];
+ reverseStack(aStack, ctgStack);
+
+ if(scafInBuf == scafBufSize)
+ {
+ mapCtg2Scaf(scafInBuf);
+ scafCounter += scafInBuf;
+ scafInBuf = 0;
+ }
+
+ //if(index%1000==0)
+ //printf("Processed %d scaffolds\n",index);
+ }
+
+ //read next scaff
+ scafLen = prev_ctg = 0;
+ emptyStack(ctgStack);
+ sscanf(line + 9, "%d %d %d", &index, &counter, &overallLen);
+ fprintf(stderr, ">%d\n", index);
+ continue;
+ }
+
+ if(line[0] == 'G') // gap appears
+ {
+ continue;
+ }
+
+ if(line[0] >= '0' && line[0] <= '9') // a contig line
+ {
+ sscanf(line, "%d %d", &ctg, &starter);
+ actg = (CTGinSCAF *)stackPush(ctgStack);
+ actg->ctgID = ctg;
+
+ if(!prev_ctg)
+ {
+ actg->start = scafLen;
+ actg->end = actg->start + overlaplen + contig_array[ctg].length - 1;
+ }
+ else
+ {
+ gapN = starter - prev_start - (int)contig_array[prev_ctg].length;
+ gapN = gapN < 1 ? 1 : gapN;
+ actg->start = scafLen + gapN;
+ actg->end = actg->start + contig_array[ctg].length - 1;
+ }
+
+ fprintf(stderr, "%d\t%d\n", actg->start, actg->end);
+ scafLen = actg->end + 1;
+ prev_ctg = ctg;
+ prev_start = starter;
+ }
+ }
+
+ if(index)
+ {
+ aStack = ctgStackBuffer[scafInBuf++];
+ reverseStack(aStack, ctgStack);
+ mapCtg2Scaf(scafInBuf);
+ }
+
+ gapN = 0;
+
+ for(ctg = 1; ctg <= num_ctg; ctg++)
+ {
+ if(contig_array[ctg].from_vt == 0 || contig_array[ctg].multi == 1)
+ continue;
+
+ gapN++;
+ }
+
+ //printf("\nDone with %d scaffolds, %d contigs in Scaffolld\n",index,gapN);
+ fclose(fp);
+ freeStack(ctgStack);
+ freeStackBuf(ctgStackBuffer, scafBufSize);
+ free((void *)ctgStackBuffer);
+}
+
+static boolean contigElligible(unsigned int contigno)
+{
+ unsigned int ctg = index_array[contigno];
+
+ if(contig_array[ctg].from_vt == 0 || contig_array[ctg].multi == 1)
+ return 0;
+ else
+ return 1;
+
+}
+static void output1read(FILE *fo, long long readno, unsigned int contigno, int pos)
+{
+
+ unsigned int ctg = index_array[contigno];
+ int posOnScaf;
+ char orien;
+ pos = pos < 0 ? 0 : pos;
+
+ if(contig_array[ctg].flag == 0)
+ {
+ posOnScaf = contig_array[ctg].to_vt + pos - overlaplen;
+ orien = '+';
+ }
+ else
+ {
+ posOnScaf = contig_array[ctg].to_vt + contig_array[ctg].length - pos;
+ orien = '-';
+ }
+
+ /*
+ if(readno==676)
+ printf("Read %lld in region from %d, extend %d, pos %d, orien %c\n",
+ readno,contig_array[ctg].to_vt,contig_array[ctg].length,posOnScaf,orien);
+ */
+ fprintf(fo, "%lld\t%d\t%d\t%c\n", readno, contig_array[ctg].from_vt, posOnScaf, orien);
+}
+
+void locateReadOnScaf(char *graphfile)
+{
+ char name[1024], line[1024];
+ FILE *fp, *fo;
+ long long readno, counter = 0, pre_readno = 0;
+ unsigned int contigno, pre_contigno;
+ int pre_pos, pos;
+
+ locateContigOnscaff(graphfile);
+
+ sprintf(name, "%s.readOnContig", graphfile);
+ fp = ckopen(name, "r");
+ sprintf(name, "%s.readOnScaf", graphfile);
+ fo = ckopen(name, "w");
+
+ if(!orig2new)
+ {
+ convertIndex();
+ orig2new = 1;
+ }
+
+ fgets(line, 1024, fp);
+
+ while(fgets(line, 1024, fp) != NULL)
+ {
+ sscanf(line, "%lld %d %d", &readno, &contigno, &pos);
+
+ if((readno % 2 == 0) && (pre_readno == readno - 1) // they are a pair of reads
+ && contigElligible(pre_contigno) && contigElligible(contigno))
+ {
+ output1read(fo, pre_readno, pre_contigno, pre_pos);
+ output1read(fo, readno, contigno, pos);
+ counter++;
+ }
+
+ pre_readno = readno;
+ pre_contigno = contigno;
+ pre_pos = pos;
+ }
+
+ printf("%lld pairs on contig\n", counter);
+ fclose(fp);
+ fclose(fo);
+}
diff --git a/fusion/readseq1by1.c b/fusion/readseq1by1.c
new file mode 100644
index 0000000..ef089cb
--- /dev/null
+++ b/fusion/readseq1by1.c
@@ -0,0 +1,591 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+static char src_rc_seq[1024];
+extern long long single_count;
+extern long long single_map;
+void readseq1by1(char *src_seq, char *src_name, int *len_seq, FILE *fp, long long num_seq)
+{
+ int i, k, n, strL;
+ char c;
+ char str[5000];
+
+ n = 0;
+ k = num_seq;
+
+ while(fgets(str, 4950, fp))
+ {
+ if(str[0] == '#') continue;
+
+ if(str[0] == '>')
+ {
+ /*
+ if(k >= 0) { // if this isn't the first '>' in the file
+ *len_seq = n;
+ }
+ */
+ *len_seq = n;
+ n = 0;
+ sscanf(&str[1], "%s", src_name);
+ return;
+ }
+ else
+ {
+ strL = strlen(str);
+
+ if(strL + n > maxReadLen)
+ strL = maxReadLen - n;
+
+ for(i = 0; i < strL; i ++)
+ {
+ if(str[i] >= 'a' && str[i] <= 'z')
+ {
+ c = base2int(str[i] - 'a' + 'A');
+ src_seq[n ++] = c;
+ }
+ else if(str[i] >= 'A' && str[i] <= 'Z')
+ {
+ c = base2int(str[i]);
+ src_seq[n ++] = c;
+ // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ else if(str[i] == '.')
+ {
+ c = base2int('A');
+ src_seq[n ++] = c;
+ } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+
+ //printf("%d: %d\n",k,n);
+ }
+ }
+
+ if(k >= 0)
+ {
+ *len_seq = n;
+ return;
+ }
+
+ *len_seq = 0;
+}
+
+
+void read_one_sequence(FILE *fp, long long *T, char **X)
+
+{
+
+ char *fasta, *src_name; //point to fasta array
+ int num_seq, len, name_len, min_len;
+
+ num_seq = readseqpar(&len, &min_len, &name_len, fp);
+
+ if(num_seq < 1)
+ {
+ printf("no fasta sequence in file\n");
+ *T = 0;
+ return;
+ }
+
+ fasta = (char *)ckalloc(len * sizeof(char));
+ src_name = (char *)ckalloc((name_len + 1) * sizeof(char));
+ rewind(fp);
+
+ readseq1by1(fasta, src_name, &len, fp, -1);
+ readseq1by1(fasta, src_name, &len, fp, 0);
+
+ *X = fasta;
+ *T = len;
+ free((void *)src_name);
+}
+
+long long multiFileParse(int *max_leg, int *min_leg, int *max_name_leg, FILE *fp)
+{
+
+ char str[5000];
+ FILE *freads;
+ int slen;
+ long long counter = 0;
+ *max_name_leg = *max_leg = 1;
+ *min_leg = 1000;
+
+ while(fgets(str, 4950, fp))
+ {
+ slen = strlen(str);
+ str[slen - 1] = str[slen];
+ freads = ckopen(str, "r");
+ counter += readseqpar(max_leg, min_leg, max_name_leg, freads);
+ fclose(freads);
+ }
+
+ return counter;
+}
+
+long long readseqpar(int *max_leg, int *min_leg, int *max_name_leg, FILE *fp)
+{
+ int l, n;
+ long long k;
+ char str[5000], src_name[5000];
+
+
+ n = 0;
+ k = -1;
+
+ while(fgets(str, 4950, fp))
+ {
+ if(str[0] == '>')
+ {
+ if(k >= 0)
+ {
+ if(n > *max_leg)
+ *max_leg = n;
+
+ if(n < *min_leg)
+ *min_leg = n;
+
+ }
+
+ n = 0;
+ k ++;
+ sscanf(&str[1], "%s", src_name);
+
+ if((l = strlen(src_name)) > *max_name_leg)
+ * max_name_leg = l;
+ }
+ else
+ {
+ n += strlen(str) - 1;
+ }
+ }
+
+ if(n > *max_leg)
+ *max_leg = n;
+
+ if(n < *min_leg)
+ *min_leg = n;
+
+ k ++;
+ return(k);
+}
+
+void read1seqfq(char *src_seq, char *src_name, int *len_seq, FILE *fp)
+{
+ int i, n, strL;
+ char c;
+ char str[5000];
+ boolean flag = 0;
+
+ while(fgets(str, 4950, fp))
+ {
+ if(str[0] == '@')
+ {
+ flag = 1;
+ sscanf(&str[1], "%s", src_name);
+ break;
+ }
+ }
+
+ if(!flag) //last time reading fq file get this
+ {
+ *len_seq = 0;
+ return;
+ }
+
+ n = 0;
+
+ while(fgets(str, 4950, fp))
+ {
+ if(str[0] == '+')
+ {
+ fgets(str, 4950, fp); // pass quality value line
+ *len_seq = n;
+ return;
+ }
+ else
+ {
+ strL = strlen(str);
+
+ if(strL + n > maxReadLen)
+ strL = maxReadLen - n;
+
+ for(i = 0; i < strL; i ++)
+ {
+ if(str[i] >= 'a' && str[i] <= 'z')
+ {
+ c = base2int(str[i] - 'a' + 'A');
+ src_seq[n ++] = c;
+ }
+ else if(str[i] >= 'A' && str[i] <= 'Z')
+ {
+ c = base2int(str[i]);
+ src_seq[n ++] = c;
+ // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ else if(str[i] == '.')
+ {
+ c = base2int('A');
+ src_seq[n ++] = c;
+ } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+
+ //printf("%d: %d\n",k,n);
+ }
+ }
+
+ *len_seq = n;
+ return;
+}
+
+// find the next file to open in libs
+static int nextValidIndex(int libNo, boolean pair, unsigned char asm_ctg)
+{
+ int i = libNo;
+
+ while(i < num_libs)
+ {
+ if(asm_ctg == 1 && (lib_array[i].asm_flag != 1 && lib_array[i].asm_flag != 3))
+ {
+ i++;
+ continue;
+ }
+ else if(asm_ctg == 0 && (lib_array[i].asm_flag != 2 && lib_array[i].asm_flag != 3))
+ {
+ i++;
+ continue;
+ }
+ else if(asm_ctg > 1 && lib_array[i].asm_flag != asm_ctg) // reads for other purpose
+ {
+ i++;
+ continue;
+ }
+
+ if(lib_array[i].curr_type == 1 &&
+ lib_array[i].curr_index < lib_array[i].num_a1_file)
+ return i;
+
+ if(lib_array[i].curr_type == 2 &&
+ lib_array[i].curr_index < lib_array[i].num_q1_file)
+ return i;
+
+ if(lib_array[i].curr_type == 3 &&
+ lib_array[i].curr_index < lib_array[i].num_p_file)
+ return i;
+
+ if(pair)
+ {
+ if(lib_array[i].curr_type < 3)
+ {
+ lib_array[i].curr_type++;
+ lib_array[i].curr_index = 0;
+ }
+ else
+ i++;
+
+ continue;
+ }
+
+ if(lib_array[i].curr_type == 4 &&
+ lib_array[i].curr_index < lib_array[i].num_s_a_file)
+ return i;
+
+ if(lib_array[i].curr_type == 5 &&
+ lib_array[i].curr_index < lib_array[i].num_s_q_file)
+ return i;
+
+ if(lib_array[i].curr_type < 5)
+ {
+ lib_array[i].curr_type++;
+ lib_array[i].curr_index = 0;
+ }
+ else
+ i++;
+ }//for each lib
+
+ return i;
+}
+
+static FILE *openFile4read(char *fname)
+{
+ FILE *fp;
+
+ if(strlen(fname) > 3 && strcmp(fname + strlen(fname) - 3, ".gz") == 0)
+ {
+ char *cmd = (char *)ckalloc((strlen(fname) + 20) * sizeof(char));
+ sprintf(cmd, "gzip -dc %s", fname);
+ fp = popen(cmd, "r");
+ free(cmd);
+ return fp;
+ }
+ else
+ {
+ return ckopen(fname, "r");
+ }
+
+}
+
+void openFileInLib(int libNo)
+{
+ int i = libNo;
+
+ if(lib_array[i].curr_type == 1)
+ {
+ printf("[%s]opened file:\n %s\n",
+ __FUNCTION__, lib_array[i].a1_fname[lib_array[i].curr_index]);
+ printf("[%s]opened file:\n %s\n",
+ __FUNCTION__, lib_array[i].a2_fname[lib_array[i].curr_index]);
+ lib_array[i].fp1 = openFile4read(lib_array[i].a1_fname[lib_array[i].curr_index]);
+ lib_array[i].fp2 = openFile4read(lib_array[i].a2_fname[lib_array[i].curr_index]);
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 1;
+ }
+ else if(lib_array[i].curr_type == 2)
+ {
+ printf("[%s]opened file:\n %s\n",
+ __FUNCTION__, lib_array[i].q1_fname[lib_array[i].curr_index]);
+ printf("[%s]opened file:\n %s\n",
+ __FUNCTION__, lib_array[i].q2_fname[lib_array[i].curr_index]);
+ lib_array[i].fp1 = openFile4read(lib_array[i].q1_fname[lib_array[i].curr_index]);
+ lib_array[i].fp2 = openFile4read(lib_array[i].q2_fname[lib_array[i].curr_index]);
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 1;
+ }
+ else if(lib_array[i].curr_type == 3)
+ {
+ printf("[%s]opened file:\n %s\n",
+ lib_array[i].p_fname[lib_array[i].curr_index]);
+ lib_array[i].fp1 = openFile4read(lib_array[i].p_fname[lib_array[i].curr_index]);
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 0;
+ }
+ else if(lib_array[i].curr_type == 4)
+ {
+ printf("[%s]opened file:\n %s\n",
+ __FUNCTION__, lib_array[i].s_a_fname[lib_array[i].curr_index]);
+ lib_array[i].fp1 = openFile4read(lib_array[i].s_a_fname[lib_array[i].curr_index]);
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 0;
+ }
+ else if(lib_array[i].curr_type == 5)
+ {
+ printf("[%s]opened file:\n %s\n",
+ __FUNCTION__, lib_array[i].s_q_fname[lib_array[i].curr_index]);
+ lib_array[i].fp1 = openFile4read(lib_array[i].s_q_fname[lib_array[i].curr_index]);
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 0;
+ }
+
+}
+
+static void reverse2k(char *src_seq, int len_seq)
+{
+ if(!len_seq)
+ return;
+
+ int i;
+ reverseComplementSeq(src_seq, len_seq, src_rc_seq);
+
+ for(i = 0; i < len_seq; i++)
+ src_seq[i] = src_rc_seq[i];
+}
+
+static void closeFp1InLab(int libNo)
+{
+ int ftype = lib_array[libNo].curr_type;
+ int index = lib_array[libNo].curr_index - 1;
+ char *fname;
+
+ if(ftype == 1)
+ fname = lib_array[libNo].a1_fname[index];
+ else if(ftype == 2)
+ fname = lib_array[libNo].q1_fname[index];
+ else if(ftype == 3)
+ fname = lib_array[libNo].p_fname[index];
+ else if(ftype == 4)
+ fname = lib_array[libNo].s_a_fname[index];
+ else if(ftype == 5)
+ fname = lib_array[libNo].s_q_fname[index];
+ else
+ return;
+
+ if(strlen(fname) > 3 && strcmp(fname + strlen(fname) - 3, ".gz") == 0)
+ pclose(lib_array[libNo].fp1);
+ else
+ fclose(lib_array[libNo].fp1);
+}
+
+static void closeFp2InLab(int libNo)
+{
+ int ftype = lib_array[libNo].curr_type;
+ int index = lib_array[libNo].curr_index - 1;
+ char *fname;
+
+ if(ftype == 1)
+ fname = lib_array[libNo].a2_fname[index];
+ else if(ftype == 2)
+ fname = lib_array[libNo].q2_fname[index];
+ else
+ return;
+
+ if(strlen(fname) > 3 && strcmp(fname + strlen(fname) - 3, ".gz") == 0)
+ pclose(lib_array[libNo].fp2);
+ else
+ fclose(lib_array[libNo].fp2);
+}
+
+boolean read1seqInLib(char *src_seq, char *src_name, int *len_seq, int *libNo, boolean pair, unsigned char asm_ctg)
+{
+ int i = *libNo;
+ int prevLib = i;
+
+ if(!lib_array[i].fp1 // file1 does not exist
+ || (lib_array[i].curr_type != 1 && feof(lib_array[i].fp1)) // file1 reaches end and not type1
+ || (lib_array[i].curr_type == 1 && feof(lib_array[i].fp1) && feof(lib_array[i].fp2))) //f1&f2 reaches end
+ {
+ if(lib_array[i].fp1 && feof(lib_array[i].fp1))
+ {
+ closeFp1InLab(i);
+ //printf("[%s]%d reads in current file , (%.1f) map-rate .\n",__FUNCTION__,single_count,single_map/single_count);
+ single_count = single_map = 0;
+ }
+
+ if(lib_array[i].fp2 && feof(lib_array[i].fp2))
+ {
+ closeFp2InLab(i);
+ //printf("[%s]%d reads in current file , (%.1f) map-rate .\n",__FUNCTION__,single_count,single_map/single_count);
+ single_count = single_map = 0;
+ }
+
+ *libNo = nextValidIndex(i, pair, asm_ctg);
+ i = *libNo;
+
+ if(lib_array[i].rd_len_cutoff > 0)
+ maxReadLen = lib_array[i].rd_len_cutoff < maxReadLen4all ?
+ lib_array[i].rd_len_cutoff : maxReadLen4all;
+ else
+ maxReadLen = maxReadLen4all;
+
+ //record insert size info
+ //printf("from lib %d to %d, read %lld to %ld\n",prevLib,i,readNumBack,n_solexa);
+ if(pair && i != prevLib)
+ {
+ if(readNumBack < n_solexa)
+ {
+ pes[gradsCounter].PE_bound = n_solexa;
+ pes[gradsCounter].rank = lib_array[prevLib].rank;
+ pes[gradsCounter].pair_num_cut = lib_array[prevLib].pair_num_cut;
+ pes[gradsCounter++].insertS = lib_array[prevLib].avg_ins;
+ readNumBack = n_solexa;
+ }
+ }
+
+ if(i >= num_libs)
+ return 0;
+
+ openFileInLib(i);
+
+ if(lib_array[i].curr_type == 1)
+ {
+ readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp1, -1);
+ readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp2, -1);
+ }
+ else if(lib_array[i].curr_type == 3 || lib_array[i].curr_type == 4)
+ readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp1, -1);
+
+ }
+
+ if(lib_array[i].curr_type == 1)
+ {
+ if(lib_array[i].paired == 1)
+ {
+ readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp1, 1);
+
+ if(lib_array[i].reverse)
+ reverse2k(src_seq, *len_seq);
+
+ lib_array[i].paired = 2;
+
+ if(*len_seq > 0 || !feof(lib_array[i].fp1))
+ {
+ n_solexa++;
+ return 1;
+ }
+ else
+ return read1seqInLib(src_seq, src_name, len_seq, libNo, pair, asm_ctg);
+ }
+ else
+ {
+ readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp2, 1);
+
+ if(lib_array[i].reverse)
+ reverse2k(src_seq, *len_seq);
+
+ lib_array[i].paired = 1;
+ n_solexa++;
+ return 1; //can't fail to read a read2
+ }
+ }
+
+ if(lib_array[i].curr_type == 2)
+ {
+ if(lib_array[i].paired == 1)
+ {
+ read1seqfq(src_seq, src_name, len_seq, lib_array[i].fp1);
+
+ /*
+ if(*len_seq>0){
+ for(j=0;j<*len_seq;j++)
+ printf("%c",int2base(src_seq[j]));
+ printf("\n");
+ }
+ */
+ if(lib_array[i].reverse)
+ reverse2k(src_seq, *len_seq);
+
+ lib_array[i].paired = 2;
+
+ if(*len_seq > 0 || !feof(lib_array[i].fp1))
+ {
+ n_solexa++;
+ return 1;
+ }
+ else
+ return read1seqInLib(src_seq, src_name, len_seq, libNo, pair, asm_ctg);
+ }
+ else
+ {
+ read1seqfq(src_seq, src_name, len_seq, lib_array[i].fp2);
+
+ if(lib_array[i].reverse)
+ reverse2k(src_seq, *len_seq);
+
+ lib_array[i].paired = 1;
+ n_solexa++;
+ return 1; //can't fail to read a read2
+ }
+ }
+
+ if(lib_array[i].curr_type == 5)
+ read1seqfq(src_seq, src_name, len_seq, lib_array[i].fp1);
+ else
+ {
+ readseq1by1(src_seq, src_name, len_seq, lib_array[i].fp1, 1);
+ }
+
+ /*
+ int t;
+ for(t=0;t<*len_seq;t++)
+ printf("%d",src_seq[t]);
+ printf("\n");
+ */
+ if(lib_array[i].reverse)
+ reverse2k(src_seq, *len_seq);
+
+ if(*len_seq > 0 || !feof(lib_array[i].fp1))
+ {
+ n_solexa++;
+ return 1;
+ }
+ else
+ return read1seqInLib(src_seq, src_name, len_seq, libNo, pair, asm_ctg);
+}
diff --git a/fusion/scaffold.c b/fusion/scaffold.c
new file mode 100644
index 0000000..4088aba
--- /dev/null
+++ b/fusion/scaffold.c
@@ -0,0 +1,62 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+static void initenv(int argc, char **argv);
+static void display_scaff_usage();
+
+static boolean LINK, SCAFF;
+
+
+int call_scaffold()
+{
+ time_t start_t, stop_t, time_bef, time_aft;
+ time(&start_t);
+
+ //initenv(argc, argv);
+
+ loadPEgrads(graphfile);
+
+ time(&time_bef);
+ loadUpdatedEdges(graphfile);
+ time(&time_aft);
+ //printf("time spent on loading edges %ds\n",(int)(time_aft-time_bef));
+
+ if(!SCAFF)
+ {
+ time(&time_bef);
+ PE2Links(graphfile);
+ time(&time_aft);
+ //printf("time spent on loading pair end info %ds\n",(int)(time_aft-time_bef));
+
+ time(&time_bef);
+ Links2Scaf(graphfile);
+ time(&time_aft);
+ //printf("time spent on creating scaffolds %ds\n",(int)(time_aft-time_bef));
+
+ scaffolding(100, graphfile);
+ }
+
+ prlReadsCloseGap(graphfile);
+
+
+ // locateReadOnScaf(graphfile);
+
+ free_pe_mem();
+
+ if(index_array)
+ free((void *)index_array);
+
+ freeContig_array();
+
+ //destroyPreArcMem();
+ destroyConnectMem();
+ deleteCntLookupTable();
+
+ time(&stop_t);
+ //printf("time elapsed: %dm\n",(int)(stop_t-start_t)/60);
+ printf("[%s]total time on scaffolding : %d minute(s).\n", __FUNCTION__, (int)(stop_t - start_t) / 60);
+
+ return 0;
+}
diff --git a/fusion/searchPath.c b/fusion/searchPath.c
new file mode 100644
index 0000000..7692efd
--- /dev/null
+++ b/fusion/searchPath.c
@@ -0,0 +1,205 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+static int trace_limit = 5000; //the times function is called in a search
+/*
+ search connection paths which were masked along related contigs
+ start from one contig, end with another
+ path length includes the length of the last contig
+*/
+void traceAlongMaskedCnt(unsigned int destE, unsigned int currE, int max_steps, int min, int max,
+ int index, int len, int *num_route)
+{
+ num_trace++;
+
+ if(num_trace > trace_limit || *num_route >= max_n_routes)
+ {
+ return;
+ }
+
+ unsigned int *array;
+ int num, i, length;
+ CONNECT *ite_cnt;
+
+ if(index > 0) // there're at most max_steps edges stored in this array including the destination edge
+ length = len + contig_array[currE].length;
+ else
+ length = 0;
+
+ if(index > max_steps || length > max)
+ return; // this is the only situation we stop
+
+ if(index > 0) // there're at most max_steps edges stored in this array including the destination edge
+ so_far[index - 1] = currE;
+
+ if(currE == destE && index == 0)
+ {
+ printf("traceAlongMaskedCnt: start and destination are the same\n");
+ return;
+ }
+
+ if(currE == destE && length >= min && length <= max)
+ {
+ num = *num_route;
+ array = found_routes[num];
+
+ for(i = 0; i < index; i++)
+ array[i] = so_far[i];
+
+ if(index < max_steps)
+ array[index] = 0; //indicate the end of the route
+
+ *num_route = ++num;
+ } // one route is extrated, but we don't terminate searching
+
+ ite_cnt = contig_array[currE].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(!ite_cnt->mask || ite_cnt->deleted)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ traceAlongMaskedCnt(destE, ite_cnt->contigID, max_steps, min, max,
+ index + 1, length + ite_cnt->gapLen, num_route);
+ ite_cnt = ite_cnt->next;
+ }
+
+}
+// search connection paths from one connect to a contig
+// path length includes the length of the last contig
+void traceAlongConnect(unsigned int destE, CONNECT *currCNT, int max_steps, int min, int max, int index, int len, int *num_route)
+{
+ num_trace++;
+
+ if(num_trace > trace_limit || *num_route >= max_n_routes)
+ {
+ return;
+ }
+
+ unsigned int *array, currE;
+ int num, i, length;
+ CONNECT *ite_cnt;
+
+ currE = currCNT->contigID;
+ length = len + currCNT->gapLen;
+ length += contig_array[currE].length;
+
+ if(index > max_steps || length > max)
+ return; // this is the only situation we stop
+
+ /*
+ if(globalFlag)
+ printf("B: step %d, ctg %d, length %d\n",index,currCNT->contigID,length);
+ */
+ if(currE == destE && index == 1)
+ {
+ printf("traceAlongConnect: start and destination are the same\n");
+ return;
+ }
+
+ so_far[index - 1] = currE; // there're at most max_steps edges stored in this array including the destination edge
+
+ if(currE == destE && length >= min && length <= max)
+ {
+ num = *num_route;
+ array = found_routes[num];
+
+ for(i = 0; i < index; i++)
+ array[i] = so_far[i];
+
+ if(index < max_steps)
+ array[index] = 0; //indicate the end of the route
+
+ *num_route = ++num;
+ } // one route is extrated, but we don't terminate searching
+
+ if(currCNT->nextInScaf)
+ {
+ traceAlongConnect(destE, currCNT->nextInScaf, max_steps, min, max, index + 1, length, num_route);
+ return;
+ }
+
+ ite_cnt = contig_array[currE].downwardConnect;
+
+ while(ite_cnt)
+ {
+ if(ite_cnt->mask || ite_cnt->deleted)
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ traceAlongConnect(destE, ite_cnt, max_steps, min, max, index + 1, length, num_route);
+ ite_cnt = ite_cnt->next;
+ }
+
+}
+
+//find paths in the graph from currE to destE, its length does not include length of both end contigs
+void traceAlongArc(unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int *num_route)
+{
+ num_trace++;
+
+ if(num_trace > trace_limit || *num_route >= max_n_routes)
+ {
+ return;
+ }
+
+ unsigned int *array, out_ed, vt;
+ int num, i, pos, length;
+ preARC *parc;
+
+ pos = index;
+
+ if(pos > max_steps || len > max)
+ return; // this is the only situation we stop
+
+ if(currE == destE && pos == 0)
+ {
+ printf("traceAlongArc: start and destination are the same\n");
+ return;
+ }
+
+ if(pos > 0) // pos starts with 0 for the starting edge
+ so_far[pos - 1] = currE; // there're at most max_steps edges stored in this array including the destination edge
+
+ if(currE == destE && len >= min)
+ {
+ num = *num_route;
+ array = found_routes[num];
+
+ for(i = 0; i < pos; i++)
+ array[i] = so_far[i];
+
+ if(pos < max_steps)
+ array[pos] = 0; //indicate the end of the route
+
+ *num_route = ++num;
+ } // one route is extrated, but we don't terminate searching
+
+ if(pos == max_steps || len == max)
+ return;
+
+ if(pos++ > 0) //not the starting edge
+ length = len + contig_array[currE].length;
+ else
+ length = len;
+
+
+ vt = contig_array[currE].to_vt;
+
+ parc = contig_array[currE].arcs;
+
+ while(parc)
+ {
+ out_ed = parc->to_ed;
+ traceAlongArc(destE, out_ed, max_steps, min, max, pos, length, num_route);
+ parc = parc->next;
+ }
+
+}
diff --git a/fusion/seq.c b/fusion/seq.c
new file mode 100644
index 0000000..3a95639
--- /dev/null
+++ b/fusion/seq.c
@@ -0,0 +1,195 @@
+#include "stdinc.h"
+#include "newhash.h"
+#include "extfunc.h"
+#include "extvab.h"
+
+/*
+put a insertSize in the grads array,
+if all grads have been entered and all the boundaris have been set, return 0
+*/
+
+void print_kmer(FILE *fp, Kmer kmer, char c)
+{
+ if(kmer)
+ fprintf(fp, "%llx", kmer);
+ else
+ fprintf(fp, "0x0");
+
+ fprintf(fp, "%c", c);
+
+}
+
+void printTightString(char *tightSeq, int len)
+{
+ int i;
+
+ for(i = 0; i < len; i++)
+ {
+ printf("%c", int2base((int)getCharInTightString(tightSeq, i)));
+
+ if((i + 1) % 100 == 0)
+ printf("\n");
+ }
+
+ printf("\n");
+}
+
+static Kmer fastReverseComp(Kmer seq, char seq_size)
+{
+ seq ^= 0xAAAAAAAAAAAAAAAALLU;
+ seq = ((seq & 0x3333333333333333LLU) << 2) | ((seq & 0xCCCCCCCCCCCCCCCCLLU) >> 2);
+ seq = ((seq & 0x0F0F0F0F0F0F0F0FLLU) << 4) | ((seq & 0xF0F0F0F0F0F0F0F0LLU) >> 4);
+ seq = ((seq & 0x00FF00FF00FF00FFLLU) << 8) | ((seq & 0xFF00FF00FF00FF00LLU) >> 8);
+ seq = ((seq & 0x0000FFFF0000FFFFLLU) << 16) | ((seq & 0xFFFF0000FFFF0000LLU) >> 16);
+ seq = ((seq & 0x00000000FFFFFFFFLLU) << 32) | ((seq & 0xFFFFFFFF00000000LLU) >> 32);
+ return seq >> (64 - (seq_size << 1));
+}
+
+Kmer reverseComplementVerbose(Kmer word, int overlap)
+{
+ return fastReverseComp(word, overlap);
+ /*
+ int index;
+ Kmer revComp = 0;
+ Kmer copy = word;
+ unsigned char nucleotide;
+
+ for (index = 0; index < overlap; index++) {
+ nucleotide = copy & 3;
+ revComp <<= 2;
+ revComp += int_comp(nucleotide);//3 - nucleotide;
+ copy >>= 2;
+ }
+ return revComp;
+ */
+}
+
+Kmer reverseComplement(Kmer word, int overlap)
+{
+ return fastReverseComp(word, overlap);
+}
+
+void writeChar2tightString(char nt, char *tightSeq, int pos)
+{
+ char *byte = tightSeq + pos / 4;
+
+ switch(pos % 4)
+ {
+ case 0:
+ *byte &= 63;
+ *byte += nt << 6;
+ return;
+
+ case 1:
+ *byte &= 207;
+ *byte += nt << 4;
+ return;
+
+ case 2:
+ *byte &= 243;
+ *byte += nt << 2;
+ return;
+
+ case 3:
+ *byte &= 252;
+ *byte += nt;
+ return;
+
+ }
+}
+
+char getCharInTightString(char *tightSeq, int pos)
+{
+ char *byte = tightSeq + pos / 4;
+
+ switch(pos % 4)
+ {
+ case 3:
+ return (*byte & 3);
+
+ case 2:
+ return (*byte & 12) >> 2;
+
+ case 1:
+ return (*byte & 48) >> 4;
+
+ case 0:
+ return (*byte & 192) >> 6;
+ }
+
+ return 0;
+}
+
+// complement of sequence denoted 0, 1, 2, 3
+void reverseComplementSeq(char *seq, int len, char *bal_seq)
+{
+ int i, index = 0;
+
+ if(len < 1)
+ return;
+
+ for(i = len - 1; i >= 0; i--)
+ bal_seq[index++] = int_comp(seq[i]);
+
+ return;
+}
+
+// complement of sequence denoted 0, 1, 2, 3
+char *compl_int_seq(char *seq, int len)
+{
+ char *bal_seq = NULL, c, bal_c;
+ int i, index;
+
+ if(len < 1)
+ return bal_seq;
+
+ bal_seq = (char *)ckalloc(len * sizeof(char));
+ index = 0;
+
+ for(i = len - 1; i >= 0; i--)
+ {
+ c = seq[i];
+
+ if(c < 4)
+ bal_c = int_comp(c);//3-c;
+ else
+ bal_c = c;
+
+ bal_seq[index++] = bal_c;
+
+ }
+
+ return bal_seq;
+}
+
+long long trans_seq(char *seq, int len)
+{
+ int i;
+ long long res;
+
+ res = 0;
+
+ for(i = 0; i < len; i ++)
+ {
+ res = res * 4 + seq[i];
+ }
+
+ return(res);
+}
+
+char *kmer2seq(Kmer word)
+{
+ int i;
+ char *seq;
+ Kmer charMask = 3;
+
+ seq = (char *)ckalloc(overlaplen * sizeof(char));
+
+ for(i = overlaplen - 1; i >= 0; i--)
+ {
+ seq[i] = charMask & word;
+ word >>= 2;
+ }
+
+ return seq;
+}
diff --git a/fusion/stack.c b/fusion/stack.c
new file mode 100644
index 0000000..1a00c4d
--- /dev/null
+++ b/fusion/stack.c
@@ -0,0 +1,132 @@
+#include "stack.h"
+
+STACK *createStack(int num_items, size_t unit_size)
+{
+ STACK *newStack = (STACK *)malloc(1 * sizeof(STACK));
+
+ newStack->block_list = NULL;
+ newStack->items_per_block = num_items;
+ newStack->item_size = unit_size;
+ newStack->item_c = 0;
+ return newStack;
+}
+
+void emptyStack(STACK *astack)
+{
+ BLOCK_STARTER *block;
+
+ if(!astack || !astack->block_list)
+ return;
+
+ block = astack->block_list;
+
+ if(block->next)
+ block = block->next;
+
+ astack->block_list = block;
+ astack->item_c = 0;
+ astack->index_in_block = 0;
+}
+
+void freeStack(STACK *astack)
+{
+ BLOCK_STARTER *ite_block, *temp_block;
+
+ if(!astack)
+ return;
+
+ ite_block = astack->block_list;
+
+ if(ite_block)
+ {
+ while(ite_block->next)
+ ite_block = ite_block->next;
+ }
+
+ while(ite_block)
+ {
+ temp_block = ite_block;
+ ite_block = ite_block->prev;
+ free((void *)temp_block);
+ }
+
+ free((void *)astack);
+}
+
+void stackBackup(STACK *astack)
+{
+ astack->block_backup = astack->block_list;
+ astack->index_backup = astack->index_in_block;
+ astack->item_c_backup = astack->item_c;
+}
+
+void stackRecover(STACK *astack)
+{
+ astack->block_list = astack->block_backup;
+ astack->index_in_block = astack->index_backup;
+ astack->item_c = astack->item_c_backup;
+}
+
+void *stackPop(STACK *astack)
+{
+ BLOCK_STARTER *block;
+
+ if(!astack || !astack->block_list || !astack->item_c)
+ return NULL;
+
+ astack->item_c--;
+ block = astack->block_list;
+
+ if(astack->index_in_block == 1)
+ {
+ if(block->next)
+ {
+ astack->block_list = block->next;
+ astack->index_in_block = astack->items_per_block;
+ }
+ else
+ {
+ astack->index_in_block = 0;
+ astack->item_c = 0;
+ }
+
+ return (void *)((void *)block + sizeof(BLOCK_STARTER));
+
+ }
+
+ return (void *)((void *)block + sizeof(BLOCK_STARTER) + astack->item_size * (--astack->index_in_block));
+}
+
+void *stackPush(STACK *astack)
+{
+ BLOCK_STARTER *block;
+
+ if(!astack)
+ return NULL;
+
+ astack->item_c++;
+
+ if(!astack->block_list || (astack->index_in_block == astack->items_per_block && !astack->block_list->prev))
+ {
+ block = malloc(sizeof(BLOCK_STARTER) + astack->items_per_block * astack->item_size);
+ block->prev = NULL;
+
+ if(astack->block_list)
+ astack->block_list->prev = block;
+
+ block->next = astack->block_list;
+ astack->block_list = block;
+ astack->index_in_block = 1;
+ return (void *)((void *)block + sizeof(BLOCK_STARTER));
+ }
+ else if(astack->index_in_block == astack->items_per_block && astack->block_list->prev)
+ {
+ astack->block_list = astack->block_list->prev;
+ astack->index_in_block = 1;
+ return (void *)((void *)astack->block_list + sizeof(BLOCK_STARTER));
+ }
+
+ block = astack->block_list;
+ return (void *)((void *)block + sizeof(BLOCK_STARTER) + astack->item_size * astack->index_in_block++);
+
+}
diff --git a/sparsePregraph/Makefile b/sparsePregraph/Makefile
index 9c88a75..0aea312 100644
--- a/sparsePregraph/Makefile
+++ b/sparsePregraph/Makefile
@@ -1,8 +1,8 @@
-CC= g++ # /opt/blc/gcc-4.5.0/bin/gcc #gcc
+CC= g++
ifdef debug
-CFLAGS= -O0 -g -fomit-frame-pointer #-static #-mcrc32 -march=core2 -msse4.1 -msse4.2
+CFLAGS= -O0 -g -fomit-frame-pointer
else
-CFLAGS= -O4 -fomit-frame-pointer #-static #-mcrc32 -march=core2 -msse4.1 -msse4.2
+CFLAGS= -O3 -fomit-frame-pointer -w
endif
DFLAGS=
@@ -37,15 +37,6 @@ EXTRA_FLAGS += -Wl,--hash-style=both
LIBS += -lbam
endif
-ifneq (,$(findstring Unix,$(shell uname)))
-EXTRA_FLAGS += -Wl,--hash-style=both
-LIBS += -lbam -lrt
-endif
-
-ifneq (,$(findstring Darwin,$(shell uname)))
-LIBS += -lbammac
-endif
-
ifneq (,$(findstring $(shell uname -m), x86_64))
CFLAGS += -m64
endif
@@ -61,26 +52,23 @@ endif
.SUFFIXES:.cpp .o
.cpp.o:
- @printf "Compiling $<... \r"; \
- $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< || echo "Error in command: $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $<"
+ @printf "Compiling $<... \r"
+ @$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< || echo "Error in command: $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $<"
-all: clean $(OBJS)
+all: $(OBJS)
+ @printf "$(PROG) objects generated. \n"
#pregraph_sparse
-.PHONY:all clean install
+.PHONY:all clean
envTest:
@test $(BIT_ERR) != 1 || sh -c 'echo "Fatal: 64bit CPU and Operating System required!";false;'
-pregraph_sparse: clean envTest $(OBJS)
- @printf "Linking... \r"
- #@$(CC) $(CFLAGS)$(INCLUDES) -o $(PROG) $(OBJS) $(LIBPATH) $(LIBS) $(ENTRAFLAGS)
- @printf "$(PROG) compilation done.\n";
+pregraph_sparse: clean envTest $(OBJS)
+ @printf "Linking... \r"
+ @$(CC) $(CFLAGS)$(INCLUDES) -o $(PROG) $(OBJS) $(LIBPATH) $(LIBS) $(ENTRAFLAGS)
+ @printf "$(PROG) compilation done. \n"
clean:
- @rm -fr gmon.out *.o a.out *.exe *.dSYM $(PROG) *~ *.a *.so.* *.so *.dylib
- @printf "$(PROG) cleaning done.\n";
-
-install:
- @cp $(PROG) ../bin/
- @printf "$(PROG) installed at ../bin/$(PROG)\n"
+ @rm -fr gmon.out *.o a.out $(PROG)
+ @printf "$(PROG) cleaning done. \n"
diff --git a/sparsePregraph/build_edge.cpp b/sparsePregraph/build_edge.cpp
index 2b9ceab..d1a0039 100644
--- a/sparsePregraph/build_edge.cpp
+++ b/sparsePregraph/build_edge.cpp
@@ -1,7 +1,7 @@
/*
* build_edge.cpp
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -49,192 +49,192 @@ Output:
Return:
None.
*************************************************/
-void RemovingWeakNodesAndEdges2 ( hashtable2 * ht, int K_size, int NodeCovTh, int EdgeCovTh, size_t * bucket_cnt, size_t * edge_cnt )
+void RemovingWeakNodesAndEdges2 ( hashtable2 *ht, int K_size, int NodeCovTh, int EdgeCovTh, size_t *bucket_cnt, size_t *edge_cnt )
{
- stat_edge_num ( ht );
- stat_edge_cvg_len ( ht );
- int Removed_Nodes_cnt = 0, Removed_Edges_cnt = 0;
- bucket2 * bktptr = NULL, *bktptr_tmp = NULL;
- bucket2 ** bktp2p = NULL;
- edge_node * edge_ptr = NULL, *next_edge = NULL, *edge_tmp = NULL;
- int smaller;
- fprintf ( stderr, "Start to remove weak nodes and kmer-edges.\n" );
-
- /*
- for(size_t i=0;i<ht->ht_sz;++i)
- {
- bktptr=ht->store_pos[i];
- while(bktptr!=NULL)
- {
- if(bktptr->kmer_info.cov1==0)printf("zero\n");
-
- bktptr=bktptr->nxt_bucket;
- }
-
- }*/
-
- //removing weak nodes
- for ( size_t i = 0; i < ht->ht_sz; ++i )
- {
- bktptr = ht->store_pos[i];
-
- while ( bktptr != NULL )
- {
- if ( bktptr->kmer_info.cov1 <= NodeCovTh )
- {
- bktptr->kmer_info.deleted = 1;
- Removed_Nodes_cnt++;
- edge_ptr = bktptr->kmer_info.right;
-
- while ( edge_ptr )
- {
- edge_ptr->used = 1;
- edge_ptr = edge_ptr->nxt_edge;
- Removed_Edges_cnt++;
- }
-
- edge_ptr = bktptr->kmer_info.left;
-
- while ( edge_ptr )
- {
- edge_ptr->used = 1;
- edge_ptr = edge_ptr->nxt_edge;
- Removed_Edges_cnt++;
- }
- }
-
- bktptr = bktptr->nxt_bucket;
- }
- }
-
- //removing dead edges
- for ( size_t i = 0; i < ht->ht_sz; ++i )
- {
- bktptr = ht->store_pos[i];
-
- while ( bktptr != NULL )
- {
- edge_ptr = bktptr->kmer_info.right;
-
- while ( edge_ptr )
- {
- if ( edge_ptr->edge_cov <= EdgeCovTh )
- {
- edge_ptr->used = 1; //becasuse the cvg of edges is symmetrial, so it's ok
- Removed_Edges_cnt++;
- }
- else
- {
- bktptr_tmp = lastKmer ( ht, K_size, bktptr, edge_ptr, 0, smaller );
-
- if ( !bktptr_tmp )
- {
- fprintf ( stderr, "ERROR: to node not found error!\n" );
- exit ( -1 );
- }
-
- if ( bktptr_tmp ->kmer_info.deleted )
- {
- edge_ptr->used = 1;
- Removed_Edges_cnt++;
- }
- }
-
- edge_ptr = edge_ptr->nxt_edge;
- }
-
- edge_ptr = bktptr->kmer_info.left;
-
- while ( edge_ptr )
- {
- if ( edge_ptr->edge_cov <= EdgeCovTh )
- {
- edge_ptr->used = 1; //becasuse the cvg of edges is symmetrial, so it's ok
- Removed_Edges_cnt++;
- }
- else
- {
- bktptr_tmp = lastKmer ( ht, K_size, bktptr, edge_ptr, 1, smaller );
-
- if ( !bktptr_tmp )
- {
- fprintf ( stderr, "ERROR: to node not found error! \n" );
- exit ( -1 );
- }
-
- if ( bktptr_tmp ->kmer_info.deleted )
- {
- edge_ptr->used = 1;
- Removed_Edges_cnt++;
- }
- }
-
- edge_ptr = edge_ptr->nxt_edge;
- }
-
- bktptr = bktptr->nxt_bucket;
- }
- }
-
- for ( size_t i = 0; i < ht->ht_sz; ++i )
- {
- bktptr = ht->store_pos[i];
- bktp2p = & ( ht->store_pos[i] );
-
- while ( bktptr != NULL )
- {
- edge_ptr = bktptr->kmer_info.right;
-
- while ( edge_ptr )
- {
- next_edge = edge_ptr->nxt_edge;
-
- if ( edge_ptr->used )
- {
- removeEdge ( bktptr, edge_ptr, 0 );
- //Removed_Edges_cnt2++;
- }
-
- edge_ptr = next_edge;
- }
-
- edge_ptr = bktptr->kmer_info.left;
-
- while ( edge_ptr )
- {
- next_edge = edge_ptr->nxt_edge;
-
- if ( edge_ptr->used )
- {
- removeEdge ( bktptr, edge_ptr, 1 );
- //Removed_Edges_cnt2++;
- }
-
- edge_ptr = next_edge;
- }
-
- bktptr_tmp = bktptr->nxt_bucket;
-
- if ( bktptr->kmer_info.deleted )
- {
- free ( bktptr );
- ( *bktp2p ) = bktptr_tmp;
- //Removed_Nodes_cnt2++;
- }
- else
- {
- bktp2p = & ( bktptr->nxt_bucket );
- }
-
- bktptr = bktptr_tmp;
- }
- }
-
- fprintf ( stderr, "%llu nodes removed.\n", Removed_Nodes_cnt );
- fprintf ( stderr, "%llu edges removed.\n", Removed_Edges_cnt );
- fprintf ( stderr, "\n" );
- ( *bucket_cnt ) -= Removed_Nodes_cnt;
- ( *edge_cnt ) -= Removed_Edges_cnt;
+ stat_edge_num ( ht );
+ stat_edge_cvg_len ( ht );
+ int Removed_Nodes_cnt = 0, Removed_Edges_cnt = 0;
+ bucket2 *bktptr = NULL, *bktptr_tmp = NULL;
+ bucket2 **bktp2p = NULL;
+ edge_node *edge_ptr = NULL, *next_edge = NULL, *edge_tmp = NULL;
+ int smaller;
+ fprintf ( stderr, "Start to remove weak nodes and kmer-edges.\n" );
+
+ /*
+ for(size_t i=0;i<ht->ht_sz;++i)
+ {
+ bktptr=ht->store_pos[i];
+ while(bktptr!=NULL)
+ {
+ if(bktptr->kmer_info.cov1==0)printf("zero\n");
+
+ bktptr=bktptr->nxt_bucket;
+ }
+
+ }*/
+
+ //removing weak nodes
+ for ( size_t i = 0; i < ht->ht_sz; ++i )
+ {
+ bktptr = ht->store_pos[i];
+
+ while ( bktptr != NULL )
+ {
+ if ( bktptr->kmer_info.cov1 <= NodeCovTh )
+ {
+ bktptr->kmer_info.deleted = 1;
+ Removed_Nodes_cnt++;
+ edge_ptr = bktptr->kmer_info.right;
+
+ while ( edge_ptr )
+ {
+ edge_ptr->used = 1;
+ edge_ptr = edge_ptr->nxt_edge;
+ Removed_Edges_cnt++;
+ }
+
+ edge_ptr = bktptr->kmer_info.left;
+
+ while ( edge_ptr )
+ {
+ edge_ptr->used = 1;
+ edge_ptr = edge_ptr->nxt_edge;
+ Removed_Edges_cnt++;
+ }
+ }
+
+ bktptr = bktptr->nxt_bucket;
+ }
+ }
+
+ //removing dead edges
+ for ( size_t i = 0; i < ht->ht_sz; ++i )
+ {
+ bktptr = ht->store_pos[i];
+
+ while ( bktptr != NULL )
+ {
+ edge_ptr = bktptr->kmer_info.right;
+
+ while ( edge_ptr )
+ {
+ if ( edge_ptr->edge_cov <= EdgeCovTh )
+ {
+ edge_ptr->used = 1; //becasuse the cvg of edges is symmetrial, so it's ok
+ Removed_Edges_cnt++;
+ }
+ else
+ {
+ bktptr_tmp = lastKmer ( ht, K_size, bktptr, edge_ptr, 0, smaller );
+
+ if ( !bktptr_tmp )
+ {
+ fprintf ( stderr, "ERROR: to node not found error!\n" );
+ exit ( -1 );
+ }
+
+ if ( bktptr_tmp ->kmer_info.deleted )
+ {
+ edge_ptr->used = 1;
+ Removed_Edges_cnt++;
+ }
+ }
+
+ edge_ptr = edge_ptr->nxt_edge;
+ }
+
+ edge_ptr = bktptr->kmer_info.left;
+
+ while ( edge_ptr )
+ {
+ if ( edge_ptr->edge_cov <= EdgeCovTh )
+ {
+ edge_ptr->used = 1; //becasuse the cvg of edges is symmetrial, so it's ok
+ Removed_Edges_cnt++;
+ }
+ else
+ {
+ bktptr_tmp = lastKmer ( ht, K_size, bktptr, edge_ptr, 1, smaller );
+
+ if ( !bktptr_tmp )
+ {
+ fprintf ( stderr, "ERROR: to node not found error! \n" );
+ exit ( -1 );
+ }
+
+ if ( bktptr_tmp ->kmer_info.deleted )
+ {
+ edge_ptr->used = 1;
+ Removed_Edges_cnt++;
+ }
+ }
+
+ edge_ptr = edge_ptr->nxt_edge;
+ }
+
+ bktptr = bktptr->nxt_bucket;
+ }
+ }
+
+ for ( size_t i = 0; i < ht->ht_sz; ++i )
+ {
+ bktptr = ht->store_pos[i];
+ bktp2p = & ( ht->store_pos[i] );
+
+ while ( bktptr != NULL )
+ {
+ edge_ptr = bktptr->kmer_info.right;
+
+ while ( edge_ptr )
+ {
+ next_edge = edge_ptr->nxt_edge;
+
+ if ( edge_ptr->used )
+ {
+ removeEdge ( bktptr, edge_ptr, 0 );
+ //Removed_Edges_cnt2++;
+ }
+
+ edge_ptr = next_edge;
+ }
+
+ edge_ptr = bktptr->kmer_info.left;
+
+ while ( edge_ptr )
+ {
+ next_edge = edge_ptr->nxt_edge;
+
+ if ( edge_ptr->used )
+ {
+ removeEdge ( bktptr, edge_ptr, 1 );
+ //Removed_Edges_cnt2++;
+ }
+
+ edge_ptr = next_edge;
+ }
+
+ bktptr_tmp = bktptr->nxt_bucket;
+
+ if ( bktptr->kmer_info.deleted )
+ {
+ free ( bktptr );
+ ( *bktp2p ) = bktptr_tmp;
+ //Removed_Nodes_cnt2++;
+ }
+ else
+ {
+ bktp2p = & ( bktptr->nxt_bucket );
+ }
+
+ bktptr = bktptr_tmp;
+ }
+ }
+
+ fprintf ( stderr, "%llu nodes removed.\n", Removed_Nodes_cnt );
+ fprintf ( stderr, "%llu edges removed.\n", Removed_Edges_cnt );
+ fprintf ( stderr, "\n" );
+ ( *bucket_cnt ) -= Removed_Nodes_cnt;
+ ( *edge_cnt ) -= Removed_Edges_cnt;
}
@@ -253,43 +253,46 @@ Output:
Return:
None.
*************************************************/
-void removeMinorTips ( struct hashtable2 * ht, int K_size, int cut_len_tip, int & tip_c )
+void removeMinorTips ( struct hashtable2 *ht, int K_size, int cut_len_tip, int &tip_c )
{
- mask1in1out ( ht );
- bucket2 * bktptr = NULL;
- size_t flag = 1;
- size_t total = 0;
- int j = 0;
-
- while ( flag )
- {
- flag = 0;
-
- for ( size_t i = 0; i < ht->ht_sz; ++i )
- {
- bktptr = ht->store_pos[i];
-
- while ( bktptr != NULL )
- {
- flag += clipTipFromNode ( ht, K_size, bktptr, cut_len_tip );
- bktptr = bktptr->nxt_bucket;
- }
- }
-
- j++;
-
- if ( flag )
- {
- fprintf ( stderr, "%llu tips removed in cycle %d.\n\n", flag, j );
- total += flag;
- }
- else
- {
- fprintf ( stderr, "Total %llu tips removed.\n", total );
- }
-
- if ( flag ) { mask1in1out ( ht ); }
- }
+ mask1in1out ( ht );
+ bucket2 *bktptr = NULL;
+ size_t flag = 1;
+ size_t total = 0;
+ int j = 0;
+
+ while ( flag )
+ {
+ flag = 0;
+
+ for ( size_t i = 0; i < ht->ht_sz; ++i )
+ {
+ bktptr = ht->store_pos[i];
+
+ while ( bktptr != NULL )
+ {
+ flag += clipTipFromNode ( ht, K_size, bktptr, cut_len_tip );
+ bktptr = bktptr->nxt_bucket;
+ }
+ }
+
+ j++;
+
+ if ( flag )
+ {
+ fprintf ( stderr, "%llu tips removed in cycle %d.\n\n", flag, j );
+ total += flag;
+ }
+ else
+ {
+ fprintf ( stderr, "Total %llu tips removed.\n", total );
+ }
+
+ if ( flag )
+ {
+ mask1in1out ( ht );
+ }
+ }
}
@@ -305,38 +308,38 @@ Output:
Return:
None.
*************************************************/
-static void mask1in1out ( hashtable2 * ht )
+static void mask1in1out ( hashtable2 *ht )
{
- size_t total = 0, linear = 0;
- static int call_times;
- call_times++;
-
- for ( size_t i = 0; i < ht->ht_sz; ++i )
- {
- struct bucket2 * bkt_ptr = ht->store_pos[i];
-
- while ( bkt_ptr )
- {
- total++;//for stat
-
- if ( ( bkt_ptr->kmer_info.left != NULL && bkt_ptr->kmer_info.left->nxt_edge == NULL )
- && ( bkt_ptr->kmer_info.right != NULL && bkt_ptr->kmer_info.right->nxt_edge == NULL ) )
- {
- bkt_ptr->kmer_info.linear = 1;
- linear++;//for stat
- }
- else
- {
- bkt_ptr->kmer_info.linear = 0;
- }
-
- bkt_ptr = bkt_ptr->nxt_bucket;
- }
- }
-
- //fprintf(stderr,"Masking linear nodes, times: %d\n",call_times);
- fprintf ( stderr, "Total nodes number: %llu\n", total );
- fprintf ( stderr, "Linear nodes number: %llu\n", linear );
+ size_t total = 0, linear = 0;
+ static int call_times;
+ call_times++;
+
+ for ( size_t i = 0; i < ht->ht_sz; ++i )
+ {
+ struct bucket2 *bkt_ptr = ht->store_pos[i];
+
+ while ( bkt_ptr )
+ {
+ total++;//for stat
+
+ if ( ( bkt_ptr->kmer_info.left != NULL && bkt_ptr->kmer_info.left->nxt_edge == NULL )
+ && ( bkt_ptr->kmer_info.right != NULL && bkt_ptr->kmer_info.right->nxt_edge == NULL ) )
+ {
+ bkt_ptr->kmer_info.linear = 1;
+ linear++;//for stat
+ }
+ else
+ {
+ bkt_ptr->kmer_info.linear = 0;
+ }
+
+ bkt_ptr = bkt_ptr->nxt_bucket;
+ }
+ }
+
+ //fprintf(stderr,"Masking linear nodes, times: %d\n",call_times);
+ fprintf ( stderr, "Total nodes number: %llu\n", total );
+ fprintf ( stderr, "Linear nodes number: %llu\n", linear );
}
@@ -355,220 +358,256 @@ Output:
Return:
1 if clips a tip successfully.
*************************************************/
-static int clipTipFromNode ( hashtable2 * ht, int K_size, bucket2 * node, int cut_len_tip ) //only for remove minor tips
+static int clipTipFromNode ( hashtable2 *ht, int K_size, bucket2 *node, int cut_len_tip ) //only for remove minor tips
{
- //linear return 0
- if ( node->kmer_info.linear || node->kmer_info.deleted )
- {
- return 0;
- }
-
- // for not linear
- int in_num, out_num;
- int sum_edge_len = 0;
- int smaller;
- bool is_left;
- bool pre_is_left;
- edge_node * edge0;
- in_num = count_left_edge_num ( node );
- out_num = count_right_edge_num ( node );
-
- if ( in_num == 0 && out_num == 1 ) { is_left = 0; }
- else if ( in_num == 1 && out_num == 0 ) { is_left = 1; }
- else { return 0; }
-
- if ( is_left ) { edge0 = node->kmer_info.left; }
- else { edge0 = node->kmer_info.right; }
-
- bucket2 * next, *pre_node;
- pre_node = node;
- next = lastKmer ( ht, K_size, node, edge0, is_left, smaller );
-
- while ( next->kmer_info.linear )
- {
- if ( sum_edge_len > cut_len_tip ) { return 0; }
-
- is_left = ! ( is_left ^ smaller );
-
- if ( is_left ) { edge0 = next->kmer_info.left; }
- else { edge0 = next->kmer_info.right; }
-
- sum_edge_len += edge0->len + 1;
- pre_node = next;
- next = lastKmer ( ht, K_size, next, edge0, is_left, smaller );
-
- if ( !next )
- {
- fprintf ( stderr, "ERROR: linear edge not found error !\n" );
- exit ( -1 );
- }
- }
-
- pre_is_left = is_left;
- is_left = ( is_left ^ smaller ); //back check orientation...
- in_num = count_left_edge_num ( next );
- out_num = count_right_edge_num ( next );
-
- if ( is_left ) //check the last node left branch or not
- {
- if ( in_num == 1 )
- {
- return 0;
- }
- else if ( in_num > 1 )
- {
- edge_node * edge1 = NULL, * temp_edge = NULL;
- bucket2 * temp_bucket = NULL;
- int max_cvg = 0, single_cvg = 0, temp_smaller;
- temp_edge = next->kmer_info.left;
-
- while ( temp_edge )
- {
- single_cvg = temp_edge->edge_cov;
-
- if ( single_cvg > max_cvg ) { max_cvg = single_cvg; }
-
- if ( !edge1 )
- {
- temp_bucket = lastKmer ( ht, K_size, next, temp_edge, 1, temp_smaller );
-
- if ( !temp_bucket )
- {
- fprintf ( stderr, "ERROR: edge to NULL found error ! a\n" );
- exit ( 1 );
- }
-
- if ( pre_node == temp_bucket ) { edge1 = temp_edge; }
- }
-
- temp_edge = temp_edge->nxt_edge;
- }
-
- if ( !edge1 )
- {
- fprintf ( stderr, "ERROR: edge to node not found error ! b\n" );
- exit ( 1 );
- }
-
- if ( edge1->edge_cov < max_cvg )
- {
- removeEdge ( next, edge1, 1 );
- removeEdge ( pre_node, edge0, pre_is_left );
- node->kmer_info.deleted = 1;
- pre_node->kmer_info.deleted = 1;
- return 1;
- }
- else
- {
- return 0;
- }
- }
- else
- {
- fprintf ( stderr, "ERROR: left tips oritation error or edge not found error ! a\n" );
- exit ( -1 );
- }
- }
- else
- {
- if ( out_num == 1 )
- {
- return 0;
- }
- else if ( out_num > 1 )
- {
- edge_node * edge1 = NULL, * temp_edge = NULL;
- bucket2 * temp_bucket = NULL;
- int max_cvg = 0, single_cvg = 0, temp_smaller;
- //ok change it to a edge_remove thred_hold locally later
- //or only if it is the least cvg ->remove it
- temp_edge = next->kmer_info.right;
-
- while ( temp_edge )
- {
- single_cvg = temp_edge->edge_cov;
-
- if ( single_cvg > max_cvg ) { max_cvg = single_cvg; }
-
- if ( !edge1 )
- {
- temp_bucket = lastKmer ( ht, K_size, next, temp_edge, 0, temp_smaller );
-
- if ( !temp_bucket )
- {
- fprintf ( stderr, "ERROR: edge to NULL found, error ! b\n" );
- exit ( -1 );
- }
-
- if ( pre_node == temp_bucket ) { edge1 = temp_edge; }
- }
-
- temp_edge = temp_edge->nxt_edge;
- }
-
- if ( !edge1 )
- {
- fprintf ( stderr, "ERROR: edge to node not found error ! e\n" );
- exit ( 1 );
- }
-
- if ( edge1->edge_cov < max_cvg )
- {
- removeEdge ( next, edge1, 0 );
- removeEdge ( pre_node, edge0, pre_is_left );
- node->kmer_info.deleted = 1;
- pre_node->kmer_info.deleted = 1;
- return 1;
- }
- else
- {
- return 0;
- }
- }
- else
- {
- fprintf ( stderr, "ERROR: right tips oritation error or edge not found error! b\n" );
- exit ( -1 );
- }
- }
+ //linear return 0
+ if ( node->kmer_info.linear || node->kmer_info.deleted )
+ {
+ return 0;
+ }
+
+ // for not linear
+ int in_num, out_num;
+ int sum_edge_len = 0;
+ int smaller;
+ bool is_left;
+ bool pre_is_left;
+ edge_node *edge0;
+ in_num = count_left_edge_num ( node );
+ out_num = count_right_edge_num ( node );
+
+ if ( in_num == 0 && out_num == 1 )
+ {
+ is_left = 0;
+ }
+ else if ( in_num == 1 && out_num == 0 )
+ {
+ is_left = 1;
+ }
+ else
+ {
+ return 0;
+ }
+
+ if ( is_left )
+ {
+ edge0 = node->kmer_info.left;
+ }
+ else
+ {
+ edge0 = node->kmer_info.right;
+ }
+
+ bucket2 *next, *pre_node;
+ pre_node = node;
+ next = lastKmer ( ht, K_size, node, edge0, is_left, smaller );
+
+ while ( next->kmer_info.linear )
+ {
+ if ( sum_edge_len > cut_len_tip )
+ {
+ return 0;
+ }
+
+ is_left = ! ( is_left ^ smaller );
+
+ if ( is_left )
+ {
+ edge0 = next->kmer_info.left;
+ }
+ else
+ {
+ edge0 = next->kmer_info.right;
+ }
+
+ sum_edge_len += edge0->len + 1;
+ pre_node = next;
+ next = lastKmer ( ht, K_size, next, edge0, is_left, smaller );
+
+ if ( !next )
+ {
+ fprintf ( stderr, "ERROR: linear edge not found error !\n" );
+ exit ( -1 );
+ }
+ }
+
+ pre_is_left = is_left;
+ is_left = ( is_left ^ smaller ); //back check orientation...
+ in_num = count_left_edge_num ( next );
+ out_num = count_right_edge_num ( next );
+
+ if ( is_left ) //check the last node left branch or not
+ {
+ if ( in_num == 1 )
+ {
+ return 0;
+ }
+ else if ( in_num > 1 )
+ {
+ edge_node *edge1 = NULL, * temp_edge = NULL;
+ bucket2 *temp_bucket = NULL;
+ int max_cvg = 0, single_cvg = 0, temp_smaller;
+ temp_edge = next->kmer_info.left;
+
+ while ( temp_edge )
+ {
+ single_cvg = temp_edge->edge_cov;
+
+ if ( single_cvg > max_cvg )
+ {
+ max_cvg = single_cvg;
+ }
+
+ if ( !edge1 )
+ {
+ temp_bucket = lastKmer ( ht, K_size, next, temp_edge, 1, temp_smaller );
+
+ if ( !temp_bucket )
+ {
+ fprintf ( stderr, "ERROR: edge to NULL found error ! a\n" );
+ exit ( 1 );
+ }
+
+ if ( pre_node == temp_bucket )
+ {
+ edge1 = temp_edge;
+ }
+ }
+
+ temp_edge = temp_edge->nxt_edge;
+ }
+
+ if ( !edge1 )
+ {
+ fprintf ( stderr, "ERROR: edge to node not found error ! b\n" );
+ exit ( 1 );
+ }
+
+ if ( edge1->edge_cov < max_cvg )
+ {
+ removeEdge ( next, edge1, 1 );
+ removeEdge ( pre_node, edge0, pre_is_left );
+ node->kmer_info.deleted = 1;
+ pre_node->kmer_info.deleted = 1;
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: left tips oritation error or edge not found error ! a\n" );
+ exit ( -1 );
+ }
+ }
+ else
+ {
+ if ( out_num == 1 )
+ {
+ return 0;
+ }
+ else if ( out_num > 1 )
+ {
+ edge_node *edge1 = NULL, * temp_edge = NULL;
+ bucket2 *temp_bucket = NULL;
+ int max_cvg = 0, single_cvg = 0, temp_smaller;
+ //ok change it to a edge_remove thred_hold locally later
+ //or only if it is the least cvg ->remove it
+ temp_edge = next->kmer_info.right;
+
+ while ( temp_edge )
+ {
+ single_cvg = temp_edge->edge_cov;
+
+ if ( single_cvg > max_cvg )
+ {
+ max_cvg = single_cvg;
+ }
+
+ if ( !edge1 )
+ {
+ temp_bucket = lastKmer ( ht, K_size, next, temp_edge, 0, temp_smaller );
+
+ if ( !temp_bucket )
+ {
+ fprintf ( stderr, "ERROR: edge to NULL found, error ! b\n" );
+ exit ( -1 );
+ }
+
+ if ( pre_node == temp_bucket )
+ {
+ edge1 = temp_edge;
+ }
+ }
+
+ temp_edge = temp_edge->nxt_edge;
+ }
+
+ if ( !edge1 )
+ {
+ fprintf ( stderr, "ERROR: edge to node not found error ! e\n" );
+ exit ( 1 );
+ }
+
+ if ( edge1->edge_cov < max_cvg )
+ {
+ removeEdge ( next, edge1, 0 );
+ removeEdge ( pre_node, edge0, pre_is_left );
+ node->kmer_info.deleted = 1;
+ pre_node->kmer_info.deleted = 1;
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: right tips oritation error or edge not found error! b\n" );
+ exit ( -1 );
+ }
+ }
}
-static int count_left_edge_num ( bucket2 * bkt ) //63 127 same
+static int count_left_edge_num ( bucket2 *bkt ) //63 127 same
{
- int ret = 0;
+ int ret = 0;
- if ( bkt )
- {
- edge_node * left_edge = bkt->kmer_info.left;
+ if ( bkt )
+ {
+ edge_node *left_edge = bkt->kmer_info.left;
- while ( left_edge )
- {
- ret++;
- left_edge = left_edge->nxt_edge;
- }
- }
+ while ( left_edge )
+ {
+ ret++;
+ left_edge = left_edge->nxt_edge;
+ }
+ }
- return ret;
+ return ret;
}
-static int count_right_edge_num ( bucket2 * bkt ) //63 127 same
+static int count_right_edge_num ( bucket2 *bkt ) //63 127 same
{
- int ret = 0;
+ int ret = 0;
- if ( bkt )
- {
- edge_node * right_edge = bkt->kmer_info.right;
+ if ( bkt )
+ {
+ edge_node *right_edge = bkt->kmer_info.right;
- while ( right_edge )
- {
- ret++;
- right_edge = right_edge->nxt_edge;
- }
- }
+ while ( right_edge )
+ {
+ ret++;
+ right_edge = right_edge->nxt_edge;
+ }
+ }
- return ret;
+ return ret;
}
@@ -589,236 +628,245 @@ Return:
A pointer to the found node.
Null if not found.
*************************************************/
-static bucket2 * lastKmer ( hashtable2 * ht, int K_size, bucket2 * node, edge_node * edge, int is_left, int & smaller ) //NEW
+static bucket2 *lastKmer ( hashtable2 *ht, int K_size, bucket2 *node, edge_node *edge, int is_left, int &smaller ) //NEW
{
- if ( !node || !edge ) { return NULL; }
-
- kmer_t2 t_kmer, f_kmer;
- t_kmer = node->kmer_t2;
- kmer_t2 edge_seq;
- memset ( edge_seq.kmer, 0, sizeof ( edge_seq ) );
- ( edge_seq.kmer ) [sizeof ( edge_seq ) / sizeof ( uint64_t ) - 1] = edge->edge;
- int edge_len = edge->len + 1;
-
- if ( edge_len > K_size )
- {
- fprintf ( stderr, "ERROR: g value should be no great than kmer size!\n" );
- exit ( -1 );
- }
-
- kmer_t2 KMER_FILTER;
- initKmerFilter ( K_size, &KMER_FILTER );
-
- if ( is_left ) //left edge
- {
- kmerMoveRight ( &t_kmer, edge_len );
- kmerMoveLeft ( &edge_seq, K_size - edge_len );
- kmerOr ( &t_kmer, &edge_seq );
- kmerAnd ( &t_kmer, &KMER_FILTER );
- }
- else
- {
- kmerMoveLeft ( &t_kmer, edge_len );
- kmerOr ( &t_kmer, &edge_seq );
- kmerAnd ( &t_kmer, &KMER_FILTER );
- }
-
- f_kmer = t_kmer;
- reverseCompKmer ( &f_kmer, K_size );
-
- if ( kmerCompare ( &t_kmer, &f_kmer ) > 0 )
- {
- t_kmer = f_kmer;
- smaller = 0;
- }
- else { smaller = 1; }
-
- return search_kmer ( ht, &t_kmer );
+ if ( !node || !edge )
+ {
+ return NULL;
+ }
+
+ kmer_t2 t_kmer, f_kmer;
+ t_kmer = node->kmer_t2;
+ kmer_t2 edge_seq;
+ memset ( edge_seq.kmer, 0, sizeof ( edge_seq ) );
+ ( edge_seq.kmer ) [sizeof ( edge_seq ) / sizeof ( uint64_t ) - 1] = edge->edge;
+ int edge_len = edge->len + 1;
+
+ if ( edge_len > K_size )
+ {
+ fprintf ( stderr, "ERROR: g value should be no great than kmer size!\n" );
+ exit ( -1 );
+ }
+
+ kmer_t2 KMER_FILTER;
+ initKmerFilter ( K_size, &KMER_FILTER );
+
+ if ( is_left ) //left edge
+ {
+ kmerMoveRight ( &t_kmer, edge_len );
+ kmerMoveLeft ( &edge_seq, K_size - edge_len );
+ kmerOr ( &t_kmer, &edge_seq );
+ kmerAnd ( &t_kmer, &KMER_FILTER );
+ }
+ else
+ {
+ kmerMoveLeft ( &t_kmer, edge_len );
+ kmerOr ( &t_kmer, &edge_seq );
+ kmerAnd ( &t_kmer, &KMER_FILTER );
+ }
+
+ f_kmer = t_kmer;
+ reverseCompKmer ( &f_kmer, K_size );
+
+ if ( kmerCompare ( &t_kmer, &f_kmer ) > 0 )
+ {
+ t_kmer = f_kmer;
+ smaller = 0;
+ }
+ else
+ {
+ smaller = 1;
+ }
+
+ return search_kmer ( ht, &t_kmer );
}
-static bucket2 * search_kmer ( hashtable2 * ht, kmer_t2 * t_kmer )
+static bucket2 *search_kmer ( hashtable2 *ht, kmer_t2 *t_kmer )
{
- uint64_t hv = MurmurHash64A ( t_kmer, sizeof ( kmer_t2 ), 0 );
- size_t hash_idx = ( size_t ) ( hv % ht->ht_sz );
- bucket2 * starter = ht->store_pos[hash_idx];
+ uint64_t hv = MurmurHash64A ( t_kmer, sizeof ( kmer_t2 ), 0 );
+ size_t hash_idx = ( size_t ) ( hv % ht->ht_sz );
+ bucket2 *starter = ht->store_pos[hash_idx];
- while ( starter )
- {
- if ( kmerCompare ( & ( starter->kmer_t2 ), t_kmer ) == 0 )
- {
- return starter;
- }
+ while ( starter )
+ {
+ if ( kmerCompare ( & ( starter->kmer_t2 ), t_kmer ) == 0 )
+ {
+ return starter;
+ }
- starter = starter->nxt_bucket;
- }
+ starter = starter->nxt_bucket;
+ }
- return NULL;
+ return NULL;
}
-static void removeEdge ( bucket2 * node, edge_node * edge, int is_left ) // remove only one side ... //63 127 same ...
+static void removeEdge ( bucket2 *node, edge_node *edge, int is_left ) // remove only one side ... //63 127 same ...
{
- edge_node * pre_edge = NULL, *cur_edge = NULL, *nxt_edge = NULL;
-
- if ( !node || !edge )
- {
- return ;
- }
-
- if ( is_left )
- {
- cur_edge = node->kmer_info.left;
-
- if ( cur_edge == NULL )
- {
- return ;
- }
-
- if ( cur_edge == edge )
- {
- nxt_edge = cur_edge->nxt_edge;
- free ( cur_edge );
- cur_edge = NULL;
- node->kmer_info.left = nxt_edge;
- return ;
- }
- }
- else
- {
- cur_edge = node->kmer_info.right;
-
- if ( cur_edge == NULL )
- {
- return ;
- }
-
- if ( cur_edge == edge )
- {
- nxt_edge = cur_edge->nxt_edge;
- free ( cur_edge );
- cur_edge = NULL;
- node->kmer_info.right = nxt_edge;
- return ;
- }
- }
-
- pre_edge = cur_edge;
- cur_edge = cur_edge->nxt_edge;
-
- while ( cur_edge )
- {
- if ( cur_edge == edge ) { break; }
-
- pre_edge = cur_edge;
- cur_edge = cur_edge->nxt_edge;
- }
-
- if ( cur_edge )
- {
- nxt_edge = cur_edge->nxt_edge;
- free ( cur_edge );
- cur_edge = NULL;
- pre_edge->nxt_edge = nxt_edge;
- }
+ edge_node *pre_edge = NULL, *cur_edge = NULL, *nxt_edge = NULL;
+
+ if ( !node || !edge )
+ {
+ return ;
+ }
+
+ if ( is_left )
+ {
+ cur_edge = node->kmer_info.left;
+
+ if ( cur_edge == NULL )
+ {
+ return ;
+ }
+
+ if ( cur_edge == edge )
+ {
+ nxt_edge = cur_edge->nxt_edge;
+ free ( cur_edge );
+ cur_edge = NULL;
+ node->kmer_info.left = nxt_edge;
+ return ;
+ }
+ }
+ else
+ {
+ cur_edge = node->kmer_info.right;
+
+ if ( cur_edge == NULL )
+ {
+ return ;
+ }
+
+ if ( cur_edge == edge )
+ {
+ nxt_edge = cur_edge->nxt_edge;
+ free ( cur_edge );
+ cur_edge = NULL;
+ node->kmer_info.right = nxt_edge;
+ return ;
+ }
+ }
+
+ pre_edge = cur_edge;
+ cur_edge = cur_edge->nxt_edge;
+
+ while ( cur_edge )
+ {
+ if ( cur_edge == edge )
+ {
+ break;
+ }
+
+ pre_edge = cur_edge;
+ cur_edge = cur_edge->nxt_edge;
+ }
+
+ if ( cur_edge )
+ {
+ nxt_edge = cur_edge->nxt_edge;
+ free ( cur_edge );
+ cur_edge = NULL;
+ pre_edge->nxt_edge = nxt_edge;
+ }
}
-static void stat_edge_num ( hashtable2 * ht ) //63 127 same
+static void stat_edge_num ( hashtable2 *ht ) //63 127 same
{
- int l_num = 0, r_num = 0;
- size_t total_edge_num = 0, total_node_num = 0;
- bucket2 * bkt = NULL;
- map<int, size_t> edge_num_map;
-
- for ( size_t i = 0; i < ht->ht_sz; i++ )
- {
- bkt = ht->store_pos[i];
-
- while ( bkt )
- {
- total_node_num++;
- l_num = count_left_edge_num ( bkt );
- r_num = count_right_edge_num ( bkt );
- total_edge_num += ( l_num + r_num );
- edge_num_map[l_num]++;
- edge_num_map[r_num]++;
- bkt = bkt->nxt_bucket;
- }
- }
-
- ofstream o_edge_num ( "edge_num_stat.txt" );
- o_edge_num << "Total nodes number:" << total_node_num << endl;
- o_edge_num << "Total kmer-edges number:" << total_edge_num << endl;
- o_edge_num << "Average kmer-edges number per node:" << ( double ) total_edge_num / total_node_num << endl;
- o_edge_num << "The frequence of kmer-edges number on a node's one side as below :" << endl;
- map<int, size_t>::iterator it;
-
- for ( it = edge_num_map.begin(); it != edge_num_map.end(); ++it )
- {
- o_edge_num << it->first << "\t" << it->second << endl;
- }
-
- o_edge_num.close();
+ int l_num = 0, r_num = 0;
+ size_t total_edge_num = 0, total_node_num = 0;
+ bucket2 *bkt = NULL;
+ map<int, size_t> edge_num_map;
+
+ for ( size_t i = 0; i < ht->ht_sz; i++ )
+ {
+ bkt = ht->store_pos[i];
+
+ while ( bkt )
+ {
+ total_node_num++;
+ l_num = count_left_edge_num ( bkt );
+ r_num = count_right_edge_num ( bkt );
+ total_edge_num += ( l_num + r_num );
+ edge_num_map[l_num]++;
+ edge_num_map[r_num]++;
+ bkt = bkt->nxt_bucket;
+ }
+ }
+
+ ofstream o_edge_num ( "edge_num_stat.txt" );
+ o_edge_num << "Total nodes number:" << total_node_num << endl;
+ o_edge_num << "Total kmer-edges number:" << total_edge_num << endl;
+ o_edge_num << "Average kmer-edges number per node:" << ( double ) total_edge_num / total_node_num << endl;
+ o_edge_num << "The frequence of kmer-edges number on a node's one side as below :" << endl;
+ map<int, size_t>::iterator it;
+
+ for ( it = edge_num_map.begin(); it != edge_num_map.end(); ++it )
+ {
+ o_edge_num << it->first << "\t" << it->second << endl;
+ }
+
+ o_edge_num.close();
}
-static void stat_edge_cvg_len ( hashtable2 * ht )
+static void stat_edge_cvg_len ( hashtable2 *ht )
{
- map<int, size_t> edge_cvg_map;
- map<int, size_t> edge_len_map;
- bucket2 * bkt = NULL;
- edge_node * temp_edge = NULL;
-
- for ( size_t i = 0; i < ht->ht_sz; i++ )
- {
- bkt = ht->store_pos[i];
-
- while ( bkt )
- {
- //left
- temp_edge = bkt->kmer_info.left;
-
- while ( temp_edge )
- {
- edge_cvg_map[temp_edge->edge_cov]++;
- edge_len_map[temp_edge->len]++;
- temp_edge = temp_edge->nxt_edge;
- }
-
- //right
- temp_edge = bkt->kmer_info.right;
-
- while ( temp_edge )
- {
- edge_cvg_map[temp_edge->edge_cov]++;
- edge_len_map[temp_edge->len]++;
- temp_edge = temp_edge->nxt_edge;
- }
-
- bkt = bkt->nxt_bucket;
- }
- }
-
- ofstream o_edge_cvg ( "edge_cvg_stat.txt" );
- ofstream o_edge_len ( "edge_len_stat.txt" );
- map<int, size_t>::iterator it;
-
- for ( it = edge_cvg_map.begin(); it != edge_cvg_map.end(); ++it )
- {
- o_edge_cvg << it->first << "\t" << it->second << endl;
- }
-
- for ( it = edge_len_map.begin(); it != edge_len_map.end(); ++it )
- {
- o_edge_len << it->first << "\t" << it->second << endl;
- }
-
- o_edge_cvg.close();
- o_edge_len.close();
+ map<int, size_t> edge_cvg_map;
+ map<int, size_t> edge_len_map;
+ bucket2 *bkt = NULL;
+ edge_node *temp_edge = NULL;
+
+ for ( size_t i = 0; i < ht->ht_sz; i++ )
+ {
+ bkt = ht->store_pos[i];
+
+ while ( bkt )
+ {
+ //left
+ temp_edge = bkt->kmer_info.left;
+
+ while ( temp_edge )
+ {
+ edge_cvg_map[temp_edge->edge_cov]++;
+ edge_len_map[temp_edge->len]++;
+ temp_edge = temp_edge->nxt_edge;
+ }
+
+ //right
+ temp_edge = bkt->kmer_info.right;
+
+ while ( temp_edge )
+ {
+ edge_cvg_map[temp_edge->edge_cov]++;
+ edge_len_map[temp_edge->len]++;
+ temp_edge = temp_edge->nxt_edge;
+ }
+
+ bkt = bkt->nxt_bucket;
+ }
+ }
+
+ ofstream o_edge_cvg ( "edge_cvg_stat.txt" );
+ ofstream o_edge_len ( "edge_len_stat.txt" );
+ map<int, size_t>::iterator it;
+
+ for ( it = edge_cvg_map.begin(); it != edge_cvg_map.end(); ++it )
+ {
+ o_edge_cvg << it->first << "\t" << it->second << endl;
+ }
+
+ for ( it = edge_len_map.begin(); it != edge_len_map.end(); ++it )
+ {
+ o_edge_len << it->first << "\t" << it->second << endl;
+ }
+
+ o_edge_cvg.close();
+ o_edge_len.close();
}
@@ -837,37 +885,37 @@ Output:
Return:
None.
*************************************************/
-void kmer2edges ( hashtable2 * ht, int K_size, char * outfile )
+void kmer2edges ( hashtable2 *ht, int K_size, char *outfile )
{
- FILE * fp;
- char temp[256];
- sprintf ( temp, "%s", outfile );
- fp = fopen ( temp, "w" );
-
- if ( fp == NULL )
- {
- fprintf ( stderr, "ERROR: Can't create file %s. \n", temp );
- exit ( -1 );
- }
-
- make_edge ( ht, K_size, fp );
- fclose ( fp );
+ FILE *fp;
+ char temp[256];
+ sprintf ( temp, "%s", outfile );
+ fp = fopen ( temp, "w" );
+
+ if ( fp == NULL )
+ {
+ fprintf ( stderr, "ERROR: Can't create file %s. \n", temp );
+ exit ( -1 );
+ }
+
+ make_edge ( ht, K_size, fp );
+ fclose ( fp );
}
-static void make_edge ( hashtable2 * ht, int K_size, FILE * fp ) //63 127 same
+static void make_edge ( hashtable2 *ht, int K_size, FILE *fp ) //63 127 same
{
- bucket2 * bktptr;
-
- for ( size_t i = 0; i < ht->ht_sz; ++i )
- {
- bktptr = ht->store_pos[i];
-
- while ( bktptr != NULL )
- {
- startEdgeFromNode ( ht, K_size, bktptr, fp );
- bktptr = bktptr->nxt_bucket;
- }
- }
+ bucket2 *bktptr;
+
+ for ( size_t i = 0; i < ht->ht_sz; ++i )
+ {
+ bktptr = ht->store_pos[i];
+
+ while ( bktptr != NULL )
+ {
+ startEdgeFromNode ( ht, K_size, bktptr, fp );
+ bktptr = bktptr->nxt_bucket;
+ }
+ }
}
@@ -889,188 +937,188 @@ Output:
Return:
Zero.
*************************************************/
-static int startEdgeFromNode ( hashtable2 * ht, int K_size, bucket2 * node, FILE * fp )
+static int startEdgeFromNode ( hashtable2 *ht, int K_size, bucket2 *node, FILE *fp )
{
- static size_t call_times;
- call_times++;
-
- if ( node->kmer_info.linear || node->kmer_info.deleted )
- {
- return 0;//linear node ...
- }
-
- int left, right;
- left = count_left_edge_num ( node );
- right = count_right_edge_num ( node );
-
- if ( left == 0 && right == 0 )
- {
- return 0; //it's a dead node
- }
-
- list<stacked_node2 *> stack;
- edge_node * t_edge = NULL, *t_next = NULL;
- stacked_node2 * t_stacked_node = NULL;
- vector<preEDGE2> loops_edges;
- int node_c;
- //for right edge
- t_edge = node->kmer_info.right;
-
- while ( t_edge )
- {
- if ( t_edge->used == 1 )
- {
- t_edge = t_edge->nxt_edge;
- continue;
- }
-
- t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
- t_stacked_node->node = node;
- t_stacked_node->is_left = 0;
- t_stacked_node->edge = t_edge;
- t_stacked_node->next = NULL;
- stack.push_back ( t_stacked_node );
- t_edge->used = 1;
- stringBeads ( ht, K_size, stack, t_stacked_node, t_edge, &node_c );
- process_1stack ( ht, K_size, stack, fp, loops_edges );
- t_next = t_edge->nxt_edge;//because this procedure will remove the edge t_edge
- dislink ( ht, K_size, stack.front() );
-
- if ( stack.size() > 2 )
- {
- stack.pop_back();//change the stack
-
- if ( stack.back() && stack.size() > 1 ) //last but second node
- {
- dislink ( ht, K_size, stack.back() );
- }
- }
-
- stacked_node2 * head, *tmp_node;
- head = stack.front();
-
- while ( head )
- {
- tmp_node = head;
- free ( tmp_node );
- head = head->next;
- }
-
- stack.clear();
- t_edge = t_next;
- }
-
- //for left edge
- t_edge = node->kmer_info.left;
-
- while ( t_edge )
- {
- if ( t_edge->used == 1 )
- {
- t_edge = t_edge->nxt_edge;
- continue;
- }
-
- t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
- t_stacked_node->node = node;
- t_stacked_node->is_left = 1;
- t_stacked_node->edge = t_edge;
- t_stacked_node->next = NULL;
- stack.push_back ( t_stacked_node );
- t_edge->used = 1;
- stringBeads ( ht, K_size, stack, t_stacked_node, t_edge, &node_c ); //
- process_1stack ( ht, K_size, stack, fp, loops_edges );
- t_next = t_edge->nxt_edge;//because this procedure will remove the edge t_edge
- dislink ( ht, K_size, stack.front() );
-
- if ( stack.size() > 2 )
- {
- stack.pop_back();//change the stack
-
- if ( stack.back() && stack.size() > 1 ) //last but second node
- {
- dislink ( ht, K_size, stack.back() );
- }
- }
-
- //debug<<"before free stack"<<endl;
- stacked_node2 * head, *tmp_node;
- head = stack.front();
-
- while ( head )
- {
- tmp_node = head;
- free ( tmp_node );
- head = head->next;
- }
-
- stack.clear();
- t_edge = t_next;
- }
-
- if ( loops_edges.size() > 0 )
- {
- //fprintf(stderr,"loops_edges size %llu\n",loops_edges.size());
- int i, j, size;
- bool need_output;
- size = loops_edges.size();
- need_output = 1;
-
- //bool debug = 0;
- for ( i = 0; i < size; i++ )
- {
- string seq = * ( loops_edges[i].full_edge );
- string rc_seq = revCompSeq ( seq );
- /*
- if(seq.compare("AATTGGACGTGAGAGCAAATTGTATTGAGCATACAATTTGCTCTCACGTCCAATT") == 0) {
- fprintf(stderr,"in loops_edges %d %s\n",i,seq.c_str());
- debug = 1;
- }
-
- if(seq.compare("AATTGGACGTGAGAGCAAATTGTATGCTCAATACAATTTGCTCTCACGTCCAATT") == 0) {
- fprintf(stderr,"in loops_edges %d %s\n",i,seq.c_str());
- debug = 1;
- }
-
- if(debug ){
- fprintf(stderr, "%d %s\n",i,seq.c_str());
- fprintf(stderr, "%d %s\n",i,rc_seq.c_str());
- }*/
-
- for ( j = i + 1; j < size; j++ )
- {
- string cur_seq = * ( loops_edges[j].full_edge );
-
- if ( seq.compare ( cur_seq ) == 0 )
- {
- fprintf ( stderr, "ERROR: two equal loop edge sequence from same node, this should not happen!\n" );
- fprintf ( stderr, "%s\n", seq.c_str() );
- exit ( -1 );
- }
-
- if ( rc_seq.compare ( cur_seq ) == 0 )
- {
- fprintf ( stderr, "INFO: two loop edge sequence are reversed complemental!\n" );
- fprintf ( stderr, "%s\n", seq.c_str() );
- fprintf ( stderr, "%s\n", rc_seq.c_str() );
- need_output = 0;
- loops_edges[j].cvg += loops_edges[i].cvg;
- break;
- }
- }
-
- if ( need_output )
- {
- output_1edge ( &loops_edges[i], K_size, fp );
- //fprintf(stderr,"need output %d %s\n",i,seq.c_str());
- }
-
- delete ( loops_edges[i].full_edge );
- need_output = 1;
- }
- }
-
- return 0;
+ static size_t call_times;
+ call_times++;
+
+ if ( node->kmer_info.linear || node->kmer_info.deleted )
+ {
+ return 0;//linear node ...
+ }
+
+ int left, right;
+ left = count_left_edge_num ( node );
+ right = count_right_edge_num ( node );
+
+ if ( left == 0 && right == 0 )
+ {
+ return 0; //it's a dead node
+ }
+
+ list<stacked_node2 *> stack;
+ edge_node *t_edge = NULL, *t_next = NULL;
+ stacked_node2 *t_stacked_node = NULL;
+ vector<preEDGE2> loops_edges;
+ int node_c;
+ //for right edge
+ t_edge = node->kmer_info.right;
+
+ while ( t_edge )
+ {
+ if ( t_edge->used == 1 )
+ {
+ t_edge = t_edge->nxt_edge;
+ continue;
+ }
+
+ t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
+ t_stacked_node->node = node;
+ t_stacked_node->is_left = 0;
+ t_stacked_node->edge = t_edge;
+ t_stacked_node->next = NULL;
+ stack.push_back ( t_stacked_node );
+ t_edge->used = 1;
+ stringBeads ( ht, K_size, stack, t_stacked_node, t_edge, &node_c );
+ process_1stack ( ht, K_size, stack, fp, loops_edges );
+ t_next = t_edge->nxt_edge;//because this procedure will remove the edge t_edge
+ dislink ( ht, K_size, stack.front() );
+
+ if ( stack.size() > 2 )
+ {
+ stack.pop_back();//change the stack
+
+ if ( stack.back() && stack.size() > 1 ) //last but second node
+ {
+ dislink ( ht, K_size, stack.back() );
+ }
+ }
+
+ stacked_node2 *head, *tmp_node;
+ head = stack.front();
+
+ while ( head )
+ {
+ tmp_node = head;
+ free ( tmp_node );
+ head = head->next;
+ }
+
+ stack.clear();
+ t_edge = t_next;
+ }
+
+ //for left edge
+ t_edge = node->kmer_info.left;
+
+ while ( t_edge )
+ {
+ if ( t_edge->used == 1 )
+ {
+ t_edge = t_edge->nxt_edge;
+ continue;
+ }
+
+ t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
+ t_stacked_node->node = node;
+ t_stacked_node->is_left = 1;
+ t_stacked_node->edge = t_edge;
+ t_stacked_node->next = NULL;
+ stack.push_back ( t_stacked_node );
+ t_edge->used = 1;
+ stringBeads ( ht, K_size, stack, t_stacked_node, t_edge, &node_c ); //
+ process_1stack ( ht, K_size, stack, fp, loops_edges );
+ t_next = t_edge->nxt_edge;//because this procedure will remove the edge t_edge
+ dislink ( ht, K_size, stack.front() );
+
+ if ( stack.size() > 2 )
+ {
+ stack.pop_back();//change the stack
+
+ if ( stack.back() && stack.size() > 1 ) //last but second node
+ {
+ dislink ( ht, K_size, stack.back() );
+ }
+ }
+
+ //debug<<"before free stack"<<endl;
+ stacked_node2 *head, *tmp_node;
+ head = stack.front();
+
+ while ( head )
+ {
+ tmp_node = head;
+ free ( tmp_node );
+ head = head->next;
+ }
+
+ stack.clear();
+ t_edge = t_next;
+ }
+
+ if ( loops_edges.size() > 0 )
+ {
+ //fprintf(stderr,"loops_edges size %llu\n",loops_edges.size());
+ int i, j, size;
+ bool need_output;
+ size = loops_edges.size();
+ need_output = 1;
+
+ //bool debug = 0;
+ for ( i = 0; i < size; i++ )
+ {
+ string seq = * ( loops_edges[i].full_edge );
+ string rc_seq = revCompSeq ( seq );
+ /*
+ if(seq.compare("AATTGGACGTGAGAGCAAATTGTATTGAGCATACAATTTGCTCTCACGTCCAATT") == 0) {
+ fprintf(stderr,"in loops_edges %d %s\n",i,seq.c_str());
+ debug = 1;
+ }
+
+ if(seq.compare("AATTGGACGTGAGAGCAAATTGTATGCTCAATACAATTTGCTCTCACGTCCAATT") == 0) {
+ fprintf(stderr,"in loops_edges %d %s\n",i,seq.c_str());
+ debug = 1;
+ }
+
+ if(debug ){
+ fprintf(stderr, "%d %s\n",i,seq.c_str());
+ fprintf(stderr, "%d %s\n",i,rc_seq.c_str());
+ }*/
+
+ for ( j = i + 1; j < size; j++ )
+ {
+ string cur_seq = * ( loops_edges[j].full_edge );
+
+ if ( seq.compare ( cur_seq ) == 0 )
+ {
+ fprintf ( stderr, "ERROR: two equal loop edge sequence from same node, this should not happen!\n" );
+ fprintf ( stderr, "%s\n", seq.c_str() );
+ exit ( -1 );
+ }
+
+ if ( rc_seq.compare ( cur_seq ) == 0 )
+ {
+ fprintf ( stderr, "INFO: two loop edge sequence are reversed complemental!\n" );
+ fprintf ( stderr, "%s\n", seq.c_str() );
+ fprintf ( stderr, "%s\n", rc_seq.c_str() );
+ need_output = 0;
+ loops_edges[j].cvg += loops_edges[i].cvg;
+ break;
+ }
+ }
+
+ if ( need_output )
+ {
+ output_1edge ( &loops_edges[i], K_size, fp );
+ //fprintf(stderr,"need output %d %s\n",i,seq.c_str());
+ }
+
+ delete ( loops_edges[i].full_edge );
+ need_output = 1;
+ }
+ }
+
+ return 0;
}
@@ -1092,72 +1140,80 @@ Output:
Return:
None.
*************************************************/
-static void stringBeads ( hashtable2 * ht, int K_size, list<stacked_node2 *> &stack, stacked_node2 * from_node, edge_node * from_edge, int * node_c )
+static void stringBeads ( hashtable2 *ht, int K_size, list<stacked_node2 *> &stack, stacked_node2 *from_node, edge_node *from_edge, int *node_c )
{
- static size_t call_times;
- call_times++;
- bucket2 * t_bucket = from_node->node;
- edge_node * t_edge = from_edge;
- stacked_node2 * t_stacked_node = from_node;
- int is_left = from_node->is_left;
- int t_smaller;
- t_edge->used = 1;
- t_bucket = lastKmer ( ht, K_size, t_bucket, t_edge, is_left, t_smaller );
-
- if ( !t_bucket )
- {
- fprintf ( stderr, "ERROR: to node not found in stringBeads()\n" );
- exit ( -1 );
- }
-
- while ( t_bucket && t_bucket->kmer_info.linear )
- {
- t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
- t_stacked_node->node = t_bucket;
- is_left = ! ( is_left ^ t_smaller );
- t_stacked_node->is_left = is_left;
-
- if ( is_left ) { t_stacked_node->edge = t_bucket->kmer_info.left; }
- else { t_stacked_node->edge = t_bucket->kmer_info.right; }
-
- t_stacked_node->next = NULL;
- ( ( stacked_node2 * ) stack.back() )->next = t_stacked_node;
- stack.push_back ( t_stacked_node );
- t_stacked_node->edge->used = 1;
- t_bucket = lastKmer ( ht, K_size, t_bucket, t_stacked_node->edge, is_left, t_smaller );
- }
-
- if ( t_bucket ) //should be always true for end node ..
- {
- t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
- t_stacked_node->node = t_bucket;
- is_left = ! ( is_left ^ t_smaller );
- t_stacked_node->is_left = is_left;
- t_stacked_node->edge = NULL;
- t_stacked_node->next = NULL;
- ( ( stacked_node2 * ) stack.back() )->next = t_stacked_node;
- stack.push_back ( t_stacked_node );
- }
+ static size_t call_times;
+ call_times++;
+ bucket2 *t_bucket = from_node->node;
+ edge_node *t_edge = from_edge;
+ stacked_node2 *t_stacked_node = from_node;
+ int is_left = from_node->is_left;
+ int t_smaller;
+ t_edge->used = 1;
+ t_bucket = lastKmer ( ht, K_size, t_bucket, t_edge, is_left, t_smaller );
+
+ if ( !t_bucket )
+ {
+ fprintf ( stderr, "ERROR: to node not found in stringBeads()\n" );
+ exit ( -1 );
+ }
+
+ while ( t_bucket && t_bucket->kmer_info.linear )
+ {
+ t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
+ t_stacked_node->node = t_bucket;
+ is_left = ! ( is_left ^ t_smaller );
+ t_stacked_node->is_left = is_left;
+
+ if ( is_left )
+ {
+ t_stacked_node->edge = t_bucket->kmer_info.left;
+ }
+ else
+ {
+ t_stacked_node->edge = t_bucket->kmer_info.right;
+ }
+
+ t_stacked_node->next = NULL;
+ ( ( stacked_node2 * ) stack.back() )->next = t_stacked_node;
+ stack.push_back ( t_stacked_node );
+ t_stacked_node->edge->used = 1;
+ t_bucket = lastKmer ( ht, K_size, t_bucket, t_stacked_node->edge, is_left, t_smaller );
+ }
+
+ if ( t_bucket ) //should be always true for end node ..
+ {
+ t_stacked_node = ( stacked_node2 * ) malloc ( sizeof ( stacked_node2 ) );
+ t_stacked_node->node = t_bucket;
+ is_left = ! ( is_left ^ t_smaller );
+ t_stacked_node->is_left = is_left;
+ t_stacked_node->edge = NULL;
+ t_stacked_node->next = NULL;
+ ( ( stacked_node2 * ) stack.back() )->next = t_stacked_node;
+ stack.push_back ( t_stacked_node );
+ }
}
//for debug
static void pirntStack ( list<stacked_node2 *> &stack )
{
- static int times = 0;
- fprintf ( stderr, "call times %d \n ", times++ );
- stacked_node2 * ptr = stack.front();
+ static int times = 0;
+ fprintf ( stderr, "call times %d \n ", times++ );
+ stacked_node2 *ptr = stack.front();
- while ( ptr )
- {
- printKmer ( & ( ptr->node->kmer_t2 ), stderr );
+ while ( ptr )
+ {
+ printKmer ( & ( ptr->node->kmer_t2 ), stderr );
- if ( ptr->edge )
- { fprintf ( stderr, "%llx , %d ,", ptr->edge->edge, ptr->is_left ); }
+ if ( ptr->edge )
+ {
+ fprintf ( stderr, "%llx , %d ,", ptr->edge->edge, ptr->is_left );
+ }
- fprintf ( stderr, "->" );
- ptr = ptr->next;
- }
+ fprintf ( stderr, "->" );
+ ptr = ptr->next;
+ }
- fprintf ( stderr, "\n" );
+ fprintf ( stderr, "\n" );
}
/*************************************************
Function:
@@ -1177,129 +1233,129 @@ Output:
Return:
None.
*************************************************/
-static void process_1stack ( hashtable2 * ht, int K_size, list<stacked_node2 *> &stack, FILE * fp, vector<preEDGE2> &loops_edges )
+static void process_1stack ( hashtable2 *ht, int K_size, list<stacked_node2 *> &stack, FILE *fp, vector<preEDGE2> &loops_edges )
{
- static size_t edge_c;// edge id
- static preEDGE2 long_edge_buf;
- preEDGE2 loops;
- int TipLenTh = 3 * K_size; //orig 100
- int TipCovTh = 5;
-
- if ( stack.size() < 2 )
- {
- fprintf ( stderr, "only %llu nodes in the stack \n", stack.size() );
- exit ( -1 );
- }
- else
- {
- //palindrome check
- string full_edge = stack2string ( ht, K_size, stack ); //when output skip the first kmer first
- stacked_node2 * test = stack.front();
- bool palindrome = check_palindrome ( full_edge );
- int bal_edge = !palindrome;
- stacked_node2 * from_node = stack.front();
- stacked_node2 * to_node = stack.back();
- long_edge_buf.from_node = from_node;
- long_edge_buf.to_node = to_node;
- long_edge_buf.full_edge = &full_edge;
- long_edge_buf.bal_edge = bal_edge;
- uint64_t symbol = 0; //cvg stat
- edge_c++;
-
- if ( stack.size() == 2 )
- {
- long_edge_buf.cvg = from_node->edge->edge_cov;
- }
- else
- {
- stacked_node2 * nd_tmp = from_node;
-
- while ( nd_tmp && nd_tmp->edge )
- {
- symbol += nd_tmp->edge->edge_cov * ( nd_tmp->edge->len + 1 );
- nd_tmp = nd_tmp->next;
- }
-
- int cvg = symbol / ( full_edge.size() - K_size );
- long_edge_buf.cvg = cvg;
- }
-
- int from_left, from_right, to_left, to_right;
- from_left = count_left_edge_num ( from_node->node );
- from_right = count_right_edge_num ( from_node->node );
- to_left = count_left_edge_num ( to_node->node );
- to_right = count_right_edge_num ( to_node->node );
-
- //tips control
-
- if ( ( ( from_left + from_right == 1 ) && ( to_left + to_right == 1 ) && ( full_edge.size() < TipLenTh ) )
- || ( ( ( from_left + from_right == 1 ) || ( to_left + to_right == 1 ) )
- && ( full_edge.size() < TipLenTh ) && long_edge_buf.cvg < TipCovTh ) ) //tips args
- {
- //if(full_edge.size()<TipLenTh && long_edge_buf.cvg<TipCovTh){//it's a tip or low cvg link
- static size_t tip_num;
- tip_num++;
- }
- else
- {
- //debug begin
- /*
- string bug_seq = *(long_edge_buf.full_edge);
- if(bug_seq.compare("AATTGGACGTGAGAGCAAATTGTATTGAGCATACAATTTGCTCTCACGTCCAATT") == 0) {
- fprintf(stderr,"%s\n",bug_seq.c_str());
- fprintf(stderr,"from %llx to %llx \n",long_edge_buf.from_node->node,long_edge_buf.to_node->node);
-
- }
-
- if(bug_seq.compare("AATTGGACGTGAGAGCAAATTGTATGCTCAATACAATTTGCTCTCACGTCCAATT") == 0) {
- fprintf(stderr,"%s\n",bug_seq.c_str());
- fprintf(stderr,"from %llx to %llx \n",long_edge_buf.from_node->node,long_edge_buf.to_node->node);
-
- }*/
-
- //debug end
- if ( long_edge_buf.from_node->node == long_edge_buf.to_node->node )
- {
- loops = long_edge_buf;
- loops.full_edge = new string ( * ( long_edge_buf.full_edge ) );
- loops_edges.push_back ( loops );
- }
- else
- {
- //output edge
- output_1edge ( &long_edge_buf, K_size, fp );
- }
- }
-
- edge_c += bal_edge;
- }
+ static size_t edge_c;// edge id
+ static preEDGE2 long_edge_buf;
+ preEDGE2 loops;
+ int TipLenTh = 3 * K_size; //orig 100
+ int TipCovTh = 5;
+
+ if ( stack.size() < 2 )
+ {
+ fprintf ( stderr, "only %llu nodes in the stack \n", stack.size() );
+ exit ( -1 );
+ }
+ else
+ {
+ //palindrome check
+ string full_edge = stack2string ( ht, K_size, stack ); //when output skip the first kmer first
+ stacked_node2 *test = stack.front();
+ bool palindrome = check_palindrome ( full_edge );
+ int bal_edge = !palindrome;
+ stacked_node2 *from_node = stack.front();
+ stacked_node2 *to_node = stack.back();
+ long_edge_buf.from_node = from_node;
+ long_edge_buf.to_node = to_node;
+ long_edge_buf.full_edge = &full_edge;
+ long_edge_buf.bal_edge = bal_edge;
+ uint64_t symbol = 0; //cvg stat
+ edge_c++;
+
+ if ( stack.size() == 2 )
+ {
+ long_edge_buf.cvg = from_node->edge->edge_cov;
+ }
+ else
+ {
+ stacked_node2 *nd_tmp = from_node;
+
+ while ( nd_tmp && nd_tmp->edge )
+ {
+ symbol += nd_tmp->edge->edge_cov * ( nd_tmp->edge->len + 1 );
+ nd_tmp = nd_tmp->next;
+ }
+
+ int cvg = symbol / ( full_edge.size() - K_size );
+ long_edge_buf.cvg = cvg;
+ }
+
+ int from_left, from_right, to_left, to_right;
+ from_left = count_left_edge_num ( from_node->node );
+ from_right = count_right_edge_num ( from_node->node );
+ to_left = count_left_edge_num ( to_node->node );
+ to_right = count_right_edge_num ( to_node->node );
+
+ //tips control
+
+ if ( ( ( from_left + from_right == 1 ) && ( to_left + to_right == 1 ) && ( full_edge.size() < TipLenTh ) )
+ || ( ( ( from_left + from_right == 1 ) || ( to_left + to_right == 1 ) )
+ && ( full_edge.size() < TipLenTh ) && long_edge_buf.cvg < TipCovTh ) ) //tips args
+ {
+ //if(full_edge.size()<TipLenTh && long_edge_buf.cvg<TipCovTh){//it's a tip or low cvg link
+ static size_t tip_num;
+ tip_num++;
+ }
+ else
+ {
+ //debug begin
+ /*
+ string bug_seq = *(long_edge_buf.full_edge);
+ if(bug_seq.compare("AATTGGACGTGAGAGCAAATTGTATTGAGCATACAATTTGCTCTCACGTCCAATT") == 0) {
+ fprintf(stderr,"%s\n",bug_seq.c_str());
+ fprintf(stderr,"from %llx to %llx \n",long_edge_buf.from_node->node,long_edge_buf.to_node->node);
+
+ }
+
+ if(bug_seq.compare("AATTGGACGTGAGAGCAAATTGTATGCTCAATACAATTTGCTCTCACGTCCAATT") == 0) {
+ fprintf(stderr,"%s\n",bug_seq.c_str());
+ fprintf(stderr,"from %llx to %llx \n",long_edge_buf.from_node->node,long_edge_buf.to_node->node);
+
+ }*/
+
+ //debug end
+ if ( long_edge_buf.from_node->node == long_edge_buf.to_node->node )
+ {
+ loops = long_edge_buf;
+ loops.full_edge = new string ( * ( long_edge_buf.full_edge ) );
+ loops_edges.push_back ( loops );
+ }
+ else
+ {
+ //output edge
+ output_1edge ( &long_edge_buf, K_size, fp );
+ }
+ }
+
+ edge_c += bal_edge;
+ }
}
// WARNING: the kmer atcg is different from soapdenovo's represent
-static void output_1edge ( preEDGE2 * long_edge, int K_size, FILE * fp )
+static void output_1edge ( preEDGE2 *long_edge, int K_size, FILE *fp )
{
- fprintf ( fp, ">length %d,", long_edge->full_edge->size() - K_size );
- const char * seq = long_edge->full_edge->c_str();
- //uint64_t from_kmer[2],to_kmer[2];
- kmer_t2 from_kmer, to_kmer;
- get_kmer_from_seq ( seq, long_edge->full_edge->size() , K_size, 0, &from_kmer );
- get_kmer_from_seq ( seq, long_edge->full_edge->size() , K_size, long_edge->full_edge->size() - K_size, &to_kmer );
- uint64_t * from, *to;
- from = from_kmer.kmer;
- to = to_kmer.kmer;
+ fprintf ( fp, ">length %d,", long_edge->full_edge->size() - K_size );
+ const char *seq = long_edge->full_edge->c_str();
+ //uint64_t from_kmer[2],to_kmer[2];
+ kmer_t2 from_kmer, to_kmer;
+ get_kmer_from_seq ( seq, long_edge->full_edge->size() , K_size, 0, &from_kmer );
+ get_kmer_from_seq ( seq, long_edge->full_edge->size() , K_size, long_edge->full_edge->size() - K_size, &to_kmer );
+ uint64_t *from, *to;
+ from = from_kmer.kmer;
+ to = to_kmer.kmer;
#ifdef _63MER_
- fprintf ( fp, "%llx %llx,", from[0], from[1] );
- fprintf ( fp, "%llx %llx,", to[0], to[1] );
+ fprintf ( fp, "%llx %llx,", from[0], from[1] );
+ fprintf ( fp, "%llx %llx,", to[0], to[1] );
#endif
#ifdef _127MER_
- fprintf ( fp, "%llx %llx %llx %llx,", from[0], from[1], from[2], from[3] );
- fprintf ( fp, "%llx %llx %llx %llx,", to[0], to[1], to[2], to[3] );
+ fprintf ( fp, "%llx %llx %llx %llx,", from[0], from[1], from[2], from[3] );
+ fprintf ( fp, "%llx %llx %llx %llx,", to[0], to[1], to[2], to[3] );
#endif
- fprintf ( fp, "cvg %d,%d\n", long_edge->cvg, long_edge->bal_edge );
- fprintf ( fp, "%s", seq );
- fprintf ( fp, "\n" );
+ fprintf ( fp, "cvg %d,%d\n", long_edge->cvg, long_edge->bal_edge );
+ fprintf ( fp, "%s", seq );
+ fprintf ( fp, "\n" );
}
@@ -1317,98 +1373,98 @@ Output:
Return:
None.
*************************************************/
-static void dislink ( hashtable2 * ht, int K_size, stacked_node2 * from_node )
+static void dislink ( hashtable2 *ht, int K_size, stacked_node2 *from_node )
{
- from_node->edge->used = 1;
- int from_edge_len = from_node->edge->len;
- int smaller;
-
- if ( from_node->next )
- {
- stacked_node2 * from_next = from_node->next;
-
- if ( from_next->node == from_node->node && from_next->is_left != from_node->is_left )
- {
- return ;
- }
-
- if ( from_next->is_left ) //remove right edge
- {
- if ( from_next->node->kmer_info.linear )
- {
- bucket2 * node_tmp = lastKmer ( ht, K_size, from_next->node, from_next->node->kmer_info.right, 0, smaller );
-
- if ( node_tmp == from_node->node )
- {
- from_next->node->kmer_info.right->used = 1;
- }
- else
- {
- fprintf ( stderr, "ERROR: to node not found in dislink()\n" );
- }
- }
- else
- {
- edge_node * edge_tmp = from_next->node->kmer_info.right;
-
- while ( edge_tmp )
- {
- bucket2 * node_tmp = lastKmer ( ht, K_size, from_next->node, edge_tmp, 0, smaller );
-
- if ( node_tmp == from_node->node && edge_tmp->len == from_edge_len ) //there may be two or more edges between two nodes
- {
- edge_tmp->used = 1;
- break;
- }
-
- edge_tmp = edge_tmp->nxt_edge;
- }
-
- if ( !edge_tmp )
- {
- fprintf ( stderr, "ERROR: to node not found in dislink()\n" );
- }
- }
- }
- else // remove left edge
- {
- if ( from_next->node->kmer_info.linear )
- {
- bucket2 * node_tmp = lastKmer ( ht, K_size, from_next->node, from_next->node->kmer_info.left, 1, smaller );
-
- if ( node_tmp == from_node->node )
- {
- from_next->node->kmer_info.left ->used = 1;
- }
- else
- {
- fprintf ( stderr, "ERROR: to node not found in dislink()\n" );
- }
- }
- else
- {
- edge_node * edge_tmp = from_next->node->kmer_info.left;
-
- while ( edge_tmp )
- {
- bucket2 * node_tmp = lastKmer ( ht, K_size, from_next->node, edge_tmp, 1, smaller );
-
- if ( node_tmp == from_node->node && edge_tmp->len == from_edge_len )
- {
- edge_tmp->used = 1;
- break;
- }
-
- edge_tmp = edge_tmp->nxt_edge;
- }
-
- if ( !edge_tmp )
- {
- fprintf ( stderr, "ERROR: to node not found in dislink()\n" );
- }
- }
- }
- }
+ from_node->edge->used = 1;
+ int from_edge_len = from_node->edge->len;
+ int smaller;
+
+ if ( from_node->next )
+ {
+ stacked_node2 *from_next = from_node->next;
+
+ if ( from_next->node == from_node->node && from_next->is_left != from_node->is_left )
+ {
+ return ;
+ }
+
+ if ( from_next->is_left ) //remove right edge
+ {
+ if ( from_next->node->kmer_info.linear )
+ {
+ bucket2 *node_tmp = lastKmer ( ht, K_size, from_next->node, from_next->node->kmer_info.right, 0, smaller );
+
+ if ( node_tmp == from_node->node )
+ {
+ from_next->node->kmer_info.right->used = 1;
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: to node not found in dislink()\n" );
+ }
+ }
+ else
+ {
+ edge_node *edge_tmp = from_next->node->kmer_info.right;
+
+ while ( edge_tmp )
+ {
+ bucket2 *node_tmp = lastKmer ( ht, K_size, from_next->node, edge_tmp, 0, smaller );
+
+ if ( node_tmp == from_node->node && edge_tmp->len == from_edge_len ) //there may be two or more edges between two nodes
+ {
+ edge_tmp->used = 1;
+ break;
+ }
+
+ edge_tmp = edge_tmp->nxt_edge;
+ }
+
+ if ( !edge_tmp )
+ {
+ fprintf ( stderr, "ERROR: to node not found in dislink()\n" );
+ }
+ }
+ }
+ else // remove left edge
+ {
+ if ( from_next->node->kmer_info.linear )
+ {
+ bucket2 *node_tmp = lastKmer ( ht, K_size, from_next->node, from_next->node->kmer_info.left, 1, smaller );
+
+ if ( node_tmp == from_node->node )
+ {
+ from_next->node->kmer_info.left ->used = 1;
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: to node not found in dislink()\n" );
+ }
+ }
+ else
+ {
+ edge_node *edge_tmp = from_next->node->kmer_info.left;
+
+ while ( edge_tmp )
+ {
+ bucket2 *node_tmp = lastKmer ( ht, K_size, from_next->node, edge_tmp, 1, smaller );
+
+ if ( node_tmp == from_node->node && edge_tmp->len == from_edge_len )
+ {
+ edge_tmp->used = 1;
+ break;
+ }
+
+ edge_tmp = edge_tmp->nxt_edge;
+ }
+
+ if ( !edge_tmp )
+ {
+ fprintf ( stderr, "ERROR: to node not found in dislink()\n" );
+ }
+ }
+ }
+ }
}
@@ -1426,111 +1482,129 @@ Output:
Return:
The compacted string, namely the edge sequence.
*************************************************/ //63 127 differ, fixed
-static string stack2string ( hashtable2 * ht, int K_size, list<stacked_node2 *> & stack )
+static string stack2string ( hashtable2 *ht, int K_size, list<stacked_node2 *> &stack )
{
- static size_t call_times;
- call_times++;
- string full_edge;
- stacked_node2 * t_stack_node = stack.front();
- char tmp[1024];
- uint64_t bits[2];
- kmer_t2 tmp_kmer = ( t_stack_node->node->kmer_t2 );
-
- if ( t_stack_node->is_left )
- {
- reverseCompKmer ( &tmp_kmer, K_size );
- }
- else
- {
- }
-
- bitsarr2str ( tmp_kmer.kmer, K_size, tmp, sizeof ( kmer_t2 ) / sizeof ( uint64_t ) );
- full_edge.append ( tmp ); //put first node
-
- while ( t_stack_node )
- {
- if ( t_stack_node->edge )
- {
- if ( t_stack_node->is_left )
- {
- bits[0] = get_rev_comp_seq ( t_stack_node->edge->edge, t_stack_node->edge->len + 1 );
- bitsarr2str ( bits, t_stack_node->edge->len + 1, tmp, 1 );
- full_edge.append ( tmp );
- }
- else
- {
- bits[0] = t_stack_node->edge->edge;
- bitsarr2str ( bits, t_stack_node->edge->len + 1, tmp, 1 );
- full_edge.append ( tmp );
- }
- }
-
- t_stack_node = t_stack_node->next;
- }
-
- return full_edge;
+ static size_t call_times;
+ call_times++;
+ string full_edge;
+ stacked_node2 *t_stack_node = stack.front();
+ char tmp[1024];
+ uint64_t bits[2];
+ kmer_t2 tmp_kmer = ( t_stack_node->node->kmer_t2 );
+
+ if ( t_stack_node->is_left )
+ {
+ reverseCompKmer ( &tmp_kmer, K_size );
+ }
+ else
+ {
+ }
+
+ bitsarr2str ( tmp_kmer.kmer, K_size, tmp, sizeof ( kmer_t2 ) / sizeof ( uint64_t ) );
+ full_edge.append ( tmp ); //put first node
+
+ while ( t_stack_node )
+ {
+ if ( t_stack_node->edge )
+ {
+ if ( t_stack_node->is_left )
+ {
+ bits[0] = get_rev_comp_seq ( t_stack_node->edge->edge, t_stack_node->edge->len + 1 );
+ bitsarr2str ( bits, t_stack_node->edge->len + 1, tmp, 1 );
+ full_edge.append ( tmp );
+ }
+ else
+ {
+ bits[0] = t_stack_node->edge->edge;
+ bitsarr2str ( bits, t_stack_node->edge->len + 1, tmp, 1 );
+ full_edge.append ( tmp );
+ }
+ }
+
+ t_stack_node = t_stack_node->next;
+ }
+
+ return full_edge;
}
-static bool check_palindrome ( string & str ) //63 127 same
+static bool check_palindrome ( string &str ) //63 127 same
{
- size_t size = str.size();
- size_t mid = ( size / 2 ) + 1;
+ size_t size = str.size();
+ size_t mid = ( size / 2 ) + 1;
- for ( size_t i = 0; i < mid; ++i )
- {
- switch ( str[i] )
- {
- case 'A':
+ for ( size_t i = 0; i < mid; ++i )
+ {
+ switch ( str[i] )
+ {
+ case 'A':
- if ( str[size - i - 1] != 'T' ) { return 0; }
+ if ( str[size - i - 1] != 'T' )
+ {
+ return 0;
+ }
- break;
- case 'C':
+ break;
- if ( str[size - i - 1] != 'G' ) { return 0; }
+ case 'C':
- break;
- case 'T':
+ if ( str[size - i - 1] != 'G' )
+ {
+ return 0;
+ }
- if ( str[size - i - 1] != 'A' ) { return 0; }
+ break;
- break;
- case 'G':
+ case 'T':
- if ( str[size - i - 1] != 'C' ) { return 0; }
+ if ( str[size - i - 1] != 'A' )
+ {
+ return 0;
+ }
- break;
- }
- }
+ break;
- return 1;
+ case 'G':
+
+ if ( str[size - i - 1] != 'C' )
+ {
+ return 0;
+ }
+
+ break;
+ }
+ }
+
+ return 1;
}
-static string revCompSeq ( const string & str )
+static string revCompSeq ( const string &str )
{
- string rc_seq;
- size_t size = str.size();
-
- for ( int i = size - 1; i >= 0; i-- )
- {
- switch ( str[i] )
- {
- case 'A':
- rc_seq.push_back ( 'T' );
- break;
- case 'C':
- rc_seq.push_back ( 'G' );
- break;
- case 'T':
- rc_seq.push_back ( 'A' );
- break;
- case 'G':
- rc_seq.push_back ( 'C' );
- break;
- }
- }
-
- return rc_seq;
+ string rc_seq;
+ size_t size = str.size();
+
+ for ( int i = size - 1; i >= 0; i-- )
+ {
+ switch ( str[i] )
+ {
+ case 'A':
+ rc_seq.push_back ( 'T' );
+ break;
+
+ case 'C':
+ rc_seq.push_back ( 'G' );
+ break;
+
+ case 'T':
+ rc_seq.push_back ( 'A' );
+ break;
+
+ case 'G':
+ rc_seq.push_back ( 'C' );
+ break;
+ }
+ }
+
+ return rc_seq;
}
diff --git a/sparsePregraph/build_graph.cpp b/sparsePregraph/build_graph.cpp
index be011bf..3b4f7aa 100644
--- a/sparsePregraph/build_graph.cpp
+++ b/sparsePregraph/build_graph.cpp
@@ -16,12 +16,12 @@
#include "io_func.h"
#include "build_graph.h"
-static void process_round1_threaded ( struct read_t * read, struct hashtable2 * ht, pthread_spinlock_t * locks, size_t * bucket_count, int K_size, int gap );
+static void process_round1_threaded ( struct read_t *read, struct hashtable2 *ht, pthread_spinlock_t *locks, size_t *bucket_count, int K_size, int gap );
//static void process_round2_threaded(struct read_t *read,struct hashtable2 *ht,pthread_spinlock_t *locks,size_t *edge_cnt,int K_size,int gap);
//for debug
//static void process_round1_threaded_d(struct read_t *read, struct hashtable2 *ht,pthread_spinlock_t *locks,size_t *bucket_count,int K_size,int gap);
-static void process_round2_threaded_d ( struct read_t * read, struct hashtable2 * ht, pthread_spinlock_t * locks, size_t * edge_cnt, int K_size, int gap );
+static void process_round2_threaded_d ( struct read_t *read, struct hashtable2 *ht, pthread_spinlock_t *locks, size_t *edge_cnt, int K_size, int gap );
/*************************************************
@@ -45,36 +45,40 @@ Output:
Return:
None.
*************************************************/
-void run_process_threaded ( struct hashtable2 * ht, pthread_spinlock_t * locks, int K_size, int gap, size_t read_num, int thrd_num_s, int thrd_id, int round )
+void run_process_threaded ( struct hashtable2 *ht, pthread_spinlock_t *locks, int K_size, int gap, size_t read_num, int thrd_num_s, int thrd_id, int round )
{
- read_t read_tmp;
-
- for ( int i = thrd_id; i < read_num; i += thrd_num_s )
- {
- int bad_flag = 0;
- filter_N ( seq_t[i], bad_flag );
-
- if ( bad_flag ) {seq_t[i].clear(); continue;}
-
- Init_Read ( seq_t[i], read_tmp );
-
- if ( round == 1 )
- {
- //cout << "round 1:"<< seq_t[i] <<endl;
- process_round1_threaded ( &read_tmp, ht, locks, &bucket_count_total[thrd_id], K_size, gap );
- }
- else if ( round == 2 )
- {
- //cout << "round 2:"<< seq_t[i] <<endl;
- //process_round1_threaded_d(&read_tmp,ht,locks,&bucket_count_total[thrd_id],K_size,gap);
- process_round2_threaded_d ( &read_tmp, ht, locks, &edge_cnt_total[thrd_id], K_size, gap );
- }
- else
- {
- fprintf ( stderr, "ERROR: invalid round number!\n" );
- exit ( -1 );
- }
- }
+ read_t read_tmp;
+
+ for ( int i = thrd_id; i < read_num; i += thrd_num_s )
+ {
+ int bad_flag = 0;
+ filter_N ( seq_t[i], bad_flag );
+
+ if ( bad_flag )
+ {
+ seq_t[i].clear();
+ continue;
+ }
+
+ Init_Read ( seq_t[i], read_tmp );
+
+ if ( round == 1 )
+ {
+ //cout << "round 1:"<< seq_t[i] <<endl;
+ process_round1_threaded ( &read_tmp, ht, locks, &bucket_count_total[thrd_id], K_size, gap );
+ }
+ else if ( round == 2 )
+ {
+ //cout << "round 2:"<< seq_t[i] <<endl;
+ //process_round1_threaded_d(&read_tmp,ht,locks,&bucket_count_total[thrd_id],K_size,gap);
+ process_round2_threaded_d ( &read_tmp, ht, locks, &edge_cnt_total[thrd_id], K_size, gap );
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: invalid round number!\n" );
+ exit ( -1 );
+ }
+ }
}
@@ -97,154 +101,160 @@ Output:
Return:
None.
*************************************************/
-static void process_round1_threaded ( struct read_t * read, struct hashtable2 * ht, pthread_spinlock_t * locks, size_t * bucket_count, int K_size, int gap )
+static void process_round1_threaded ( struct read_t *read, struct hashtable2 *ht, pthread_spinlock_t *locks, size_t *bucket_count, int K_size, int gap )
{
- int readLen = read->readLen;
- int OverlappingKmers = readLen - K_size + 1;
+ int readLen = read->readLen;
+ int OverlappingKmers = readLen - K_size + 1;
- if ( gap >= OverlappingKmers )
- { return;}
+ if ( gap >= OverlappingKmers )
+ {
+ return;
+ }
- int Read_arr_sz = readLen / 32 + 1;
- int rem = readLen % 32;
+ int Read_arr_sz = readLen / 32 + 1;
+ int rem = readLen % 32;
- if ( rem == 0 )
- {Read_arr_sz--;}
+ if ( rem == 0 )
+ {
+ Read_arr_sz--;
+ }
#ifdef _63MER_
- int Kmer_arr_sz = 2;
- int tot_bits = Read_arr_sz * 64;
+ int Kmer_arr_sz = 2;
+ int tot_bits = Read_arr_sz * 64;
#endif
#ifdef _127MER_
- int Kmer_arr_sz = 4;
- int tot_bits = Read_arr_sz * 128;
+ int Kmer_arr_sz = 4;
+ int tot_bits = Read_arr_sz * 128;
#endif
- size_t ht_sz = ht->ht_sz;
- bool flip[500], found[500];
- size_t hash_idx[500];
- memset ( flip, 0, sizeof ( flip ) );
- memset ( found, 0, sizeof ( found ) );
- kmer_t2 seq[500], f_seq[500];
- memset ( seq, 0, sizeof ( seq ) );
- uint64_t hv[500], temp_bits[500];
- bucket2 ** bktptr[500];
- char c_str[500];
-
- for ( int j = 0; j < OverlappingKmers; j++ )
- {
- get_sub_arr ( read->read_bits, read->readLen, j, K_size, seq[j].kmer );
+ size_t ht_sz = ht->ht_sz;
+ bool flip[500], found[500];
+ size_t hash_idx[500];
+ memset ( flip, 0, sizeof ( flip ) );
+ memset ( found, 0, sizeof ( found ) );
+ kmer_t2 seq[500], f_seq[500];
+ memset ( seq, 0, sizeof ( seq ) );
+ uint64_t hv[500], temp_bits[500];
+ bucket2 **bktptr[500];
+ char c_str[500];
+
+ for ( int j = 0; j < OverlappingKmers; j++ )
+ {
+ get_sub_arr ( read->read_bits, read->readLen, j, K_size, seq[j].kmer );
#ifdef _63MER_
- if ( K_size <= 31 ) //fix the represent bug
- {
- ( seq[j].kmer ) [1] = ( seq[j].kmer ) [0];
- ( seq[j].kmer ) [0] = 0;
- }
+ if ( K_size <= 31 ) //fix the represent bug
+ {
+ ( seq[j].kmer ) [1] = ( seq[j].kmer ) [0];
+ ( seq[j].kmer ) [0] = 0;
+ }
#endif
#ifdef _127MER_ //fix the represent bug
- if ( K_size <= 31 ) //fix the represent bug
- {
- ( seq[j].kmer ) [3] = ( seq[j].kmer ) [0];
- ( seq[j].kmer ) [0] = 0;
- }
- else if ( K_size <= 63 )
- {
- ( seq[j].kmer ) [3] = ( seq[j].kmer ) [1];
- ( seq[j].kmer ) [2] = ( seq[j].kmer ) [0];
- ( seq[j].kmer ) [1] = 0;
- ( seq[j].kmer ) [0] = 0;
- }
- else if ( K_size <= 95 )
- {
- ( seq[j].kmer ) [3] = ( seq[j].kmer ) [2];
- ( seq[j].kmer ) [2] = ( seq[j].kmer ) [1];
- ( seq[j].kmer ) [1] = ( seq[j].kmer ) [0];
- ( seq[j].kmer ) [0] = 0;
- }
+ if ( K_size <= 31 ) //fix the represent bug
+ {
+ ( seq[j].kmer ) [3] = ( seq[j].kmer ) [0];
+ ( seq[j].kmer ) [0] = 0;
+ }
+ else if ( K_size <= 63 )
+ {
+ ( seq[j].kmer ) [3] = ( seq[j].kmer ) [1];
+ ( seq[j].kmer ) [2] = ( seq[j].kmer ) [0];
+ ( seq[j].kmer ) [1] = 0;
+ ( seq[j].kmer ) [0] = 0;
+ }
+ else if ( K_size <= 95 )
+ {
+ ( seq[j].kmer ) [3] = ( seq[j].kmer ) [2];
+ ( seq[j].kmer ) [2] = ( seq[j].kmer ) [1];
+ ( seq[j].kmer ) [1] = ( seq[j].kmer ) [0];
+ ( seq[j].kmer ) [0] = 0;
+ }
#endif
- memcpy ( &f_seq[j], &seq[j], Kmer_arr_sz * sizeof ( uint64_t ) );
- get_rev_comp_seq_arr ( ( f_seq[j].kmer ), K_size, Kmer_arr_sz ); //TODO ,add 127mer support
-
- if ( uint64_t_cmp ( seq[j].kmer, f_seq[j].kmer, Kmer_arr_sz ) > 0 )
- {
- flip[j] = 1;
- }
-
- if ( flip[j] == 1 )
- {
- memcpy ( temp_bits, & ( seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
- memcpy ( & ( seq[j].kmer ), & ( f_seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
- memcpy ( & ( f_seq[j].kmer ), temp_bits, Kmer_arr_sz * sizeof ( uint64_t ) );
- }
-
- hv[j] = MurmurHash64A ( ( seq[j].kmer ), sizeof ( seq[j] ), 0 );
- hash_idx[j] = ( size_t ) ( hv[j] % ht_sz );
- bktptr[j] = & ( ht->store_pos[hash_idx[j]] );
- }
-
- int g, h;
- g = 0;
-
- for ( int k = 0; k < gap; ++k )
- {
- pthread_spin_lock ( &locks[hash_idx[k]] );
- found[k] = look_up_in_a_list2_r1 ( &seq[k], ( struct bucket2_r1 ** * ) &bktptr[k] );
- pthread_spin_unlock ( &locks[hash_idx[k]] );
-
- if ( found[k] == 1 )
- {
- g = k;
- break;
- }
- }
-
- for ( int j = g; j < OverlappingKmers; )
- {
- h = gap;
-
- for ( int k = 0; k < gap; ++k )
- {
- if ( ( j + k ) >= OverlappingKmers - 1 )
- {
- h = k + 1; //���������һ��kmer
- break;
- }
-
- pthread_spin_lock ( &locks[hash_idx[j + k]] );
- found[j + k] = look_up_in_a_list2_r1 ( &seq[j + k], ( bucket2_r1 ** * ) &bktptr[j + k] ); //lock...
- pthread_spin_unlock ( &locks[hash_idx[j + k]] );
-
- if ( k > 0 && found[j + k] == 1 )
- {
- h = k;
- break;
- }
- }
-
- pthread_spin_lock ( &locks[hash_idx[j]] );
- found[j] = look_up_in_a_list2_r1 ( &seq[j], ( bucket2_r1 ** * ) &bktptr[j] ); //lock...
- pthread_spin_unlock ( &locks[hash_idx[j]] );
-
- if ( found[j] == 0 )
- {
- pthread_spin_lock ( &locks[hash_idx[j]] );
- * ( bktptr[j] ) = ( struct bucket2 * ) malloc ( sizeof ( struct bucket2_r1 ) ); //lock ...
- memset ( * ( bktptr[j] ), 0, sizeof ( struct bucket2_r1 ) );
- memcpy ( & ( ( ( struct bucket2_r1 * ) * ( bktptr[j] ) )->kmer_t2.kmer ), & ( seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
- ( ( struct bucket2_r1 * ) * ( bktptr[j] ) )->kmer_info.cov1 = 0;
- // the cvg is useless in round 1
- pthread_spin_unlock ( &locks[hash_idx[j]] );
- ( *bucket_count ) ++;
- }
-
- j = j + h;
-
- if ( j >= OverlappingKmers )
- {break;}
- }
+ memcpy ( &f_seq[j], &seq[j], Kmer_arr_sz * sizeof ( uint64_t ) );
+ get_rev_comp_seq_arr ( ( f_seq[j].kmer ), K_size, Kmer_arr_sz ); //TODO ,add 127mer support
+
+ if ( uint64_t_cmp ( seq[j].kmer, f_seq[j].kmer, Kmer_arr_sz ) > 0 )
+ {
+ flip[j] = 1;
+ }
+
+ if ( flip[j] == 1 )
+ {
+ memcpy ( temp_bits, & ( seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
+ memcpy ( & ( seq[j].kmer ), & ( f_seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
+ memcpy ( & ( f_seq[j].kmer ), temp_bits, Kmer_arr_sz * sizeof ( uint64_t ) );
+ }
+
+ hv[j] = MurmurHash64A ( ( seq[j].kmer ), sizeof ( seq[j] ), 0 );
+ hash_idx[j] = ( size_t ) ( hv[j] % ht_sz );
+ bktptr[j] = & ( ht->store_pos[hash_idx[j]] );
+ }
+
+ int g, h;
+ g = 0;
+
+ for ( int k = 0; k < gap; ++k )
+ {
+ pthread_spin_lock ( &locks[hash_idx[k]] );
+ found[k] = look_up_in_a_list2_r1 ( &seq[k], ( struct bucket2_r1 ** * ) &bktptr[k] );
+ pthread_spin_unlock ( &locks[hash_idx[k]] );
+
+ if ( found[k] == 1 )
+ {
+ g = k;
+ break;
+ }
+ }
+
+ for ( int j = g; j < OverlappingKmers; )
+ {
+ h = gap;
+
+ for ( int k = 0; k < gap; ++k )
+ {
+ if ( ( j + k ) >= OverlappingKmers - 1 )
+ {
+ h = k + 1; //���������һ��kmer
+ break;
+ }
+
+ pthread_spin_lock ( &locks[hash_idx[j + k]] );
+ found[j + k] = look_up_in_a_list2_r1 ( &seq[j + k], ( bucket2_r1 ** * ) &bktptr[j + k] ); //lock...
+ pthread_spin_unlock ( &locks[hash_idx[j + k]] );
+
+ if ( k > 0 && found[j + k] == 1 )
+ {
+ h = k;
+ break;
+ }
+ }
+
+ pthread_spin_lock ( &locks[hash_idx[j]] );
+ found[j] = look_up_in_a_list2_r1 ( &seq[j], ( bucket2_r1 ** * ) &bktptr[j] ); //lock...
+ pthread_spin_unlock ( &locks[hash_idx[j]] );
+
+ if ( found[j] == 0 )
+ {
+ pthread_spin_lock ( &locks[hash_idx[j]] );
+ * ( bktptr[j] ) = ( struct bucket2 * ) malloc ( sizeof ( struct bucket2_r1 ) ); //lock ...
+ memset ( * ( bktptr[j] ), 0, sizeof ( struct bucket2_r1 ) );
+ memcpy ( & ( ( ( struct bucket2_r1 * ) * ( bktptr[j] ) )->kmer_t2.kmer ), & ( seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
+ ( ( struct bucket2_r1 * ) * ( bktptr[j] ) )->kmer_info.cov1 = 0;
+ // the cvg is useless in round 1
+ pthread_spin_unlock ( &locks[hash_idx[j]] );
+ ( *bucket_count ) ++;
+ }
+
+ j = j + h;
+
+ if ( j >= OverlappingKmers )
+ {
+ break;
+ }
+ }
}
@@ -268,250 +278,262 @@ Output:
Return:
None.
*************************************************/
-static void process_round2_threaded_d ( struct read_t * read, struct hashtable2 * ht, pthread_spinlock_t * locks, size_t * edge_cnt, int K_size, int gap )
+static void process_round2_threaded_d ( struct read_t *read, struct hashtable2 *ht, pthread_spinlock_t *locks, size_t *edge_cnt, int K_size, int gap )
{
- static size_t i;
- int readLen = read->readLen;
- int OverlappingKmers = readLen - K_size + 1;
+ static size_t i;
+ int readLen = read->readLen;
+ int OverlappingKmers = readLen - K_size + 1;
- if ( gap >= OverlappingKmers )
- { return;}
+ if ( gap >= OverlappingKmers )
+ {
+ return;
+ }
- int Read_arr_sz = readLen / 32 + 1;
- int rem = readLen % 32;
+ int Read_arr_sz = readLen / 32 + 1;
+ int rem = readLen % 32;
- if ( rem == 0 )
- {Read_arr_sz--;}
+ if ( rem == 0 )
+ {
+ Read_arr_sz--;
+ }
#ifdef _63MER_
- int Kmer_arr_sz = 2;
- int tot_bits = Read_arr_sz * 64;
+ int Kmer_arr_sz = 2;
+ int tot_bits = Read_arr_sz * 64;
#endif
#ifdef _127MER_
- int Kmer_arr_sz = 4;
- int tot_bits = Read_arr_sz * 128;
+ int Kmer_arr_sz = 4;
+ int tot_bits = Read_arr_sz * 128;
#endif
- size_t ht_sz = ht->ht_sz;
- bool flip[500], found[500];
- size_t hash_idx[500];
- memset ( flip, 0, sizeof ( flip ) );
- memset ( found, 0, sizeof ( found ) );
- kmer_t2 seq[500], f_seq[500];
- uint64_t hv[500], temp_bits[500];
- bucket2 ** bktptr[500];
- char c_str[500];
-
- for ( int j = 0; j < OverlappingKmers; j++ )
- {
- get_sub_arr ( read->read_bits, read->readLen, j, K_size, seq[j].kmer );
+ size_t ht_sz = ht->ht_sz;
+ bool flip[500], found[500];
+ size_t hash_idx[500];
+ memset ( flip, 0, sizeof ( flip ) );
+ memset ( found, 0, sizeof ( found ) );
+ kmer_t2 seq[500], f_seq[500];
+ uint64_t hv[500], temp_bits[500];
+ bucket2 **bktptr[500];
+ char c_str[500];
+
+ for ( int j = 0; j < OverlappingKmers; j++ )
+ {
+ get_sub_arr ( read->read_bits, read->readLen, j, K_size, seq[j].kmer );
#ifdef _63MER_
- if ( K_size <= 31 ) //fix the represent bug
- {
- ( seq[j].kmer ) [1] = ( seq[j].kmer ) [0];
- ( seq[j].kmer ) [0] = 0;
- }
+ if ( K_size <= 31 ) //fix the represent bug
+ {
+ ( seq[j].kmer ) [1] = ( seq[j].kmer ) [0];
+ ( seq[j].kmer ) [0] = 0;
+ }
#endif
#ifdef _127MER_ //fix the represent bug
- if ( K_size <= 31 ) //fix the represent bug
- {
- ( seq[j].kmer ) [3] = ( seq[j].kmer ) [0];
- ( seq[j].kmer ) [0] = 0;
- }
- else if ( K_size <= 63 )
- {
- ( seq[j].kmer ) [3] = ( seq[j].kmer ) [1];
- ( seq[j].kmer ) [2] = ( seq[j].kmer ) [0];
- ( seq[j].kmer ) [1] = 0;
- ( seq[j].kmer ) [0] = 0;
- }
- else if ( K_size <= 95 )
- {
- ( seq[j].kmer ) [3] = ( seq[j].kmer ) [2];
- ( seq[j].kmer ) [2] = ( seq[j].kmer ) [1];
- ( seq[j].kmer ) [1] = ( seq[j].kmer ) [0];
- ( seq[j].kmer ) [0] = 0;
- }
+ if ( K_size <= 31 ) //fix the represent bug
+ {
+ ( seq[j].kmer ) [3] = ( seq[j].kmer ) [0];
+ ( seq[j].kmer ) [0] = 0;
+ }
+ else if ( K_size <= 63 )
+ {
+ ( seq[j].kmer ) [3] = ( seq[j].kmer ) [1];
+ ( seq[j].kmer ) [2] = ( seq[j].kmer ) [0];
+ ( seq[j].kmer ) [1] = 0;
+ ( seq[j].kmer ) [0] = 0;
+ }
+ else if ( K_size <= 95 )
+ {
+ ( seq[j].kmer ) [3] = ( seq[j].kmer ) [2];
+ ( seq[j].kmer ) [2] = ( seq[j].kmer ) [1];
+ ( seq[j].kmer ) [1] = ( seq[j].kmer ) [0];
+ ( seq[j].kmer ) [0] = 0;
+ }
#endif
- memcpy ( &f_seq[j], &seq[j], Kmer_arr_sz * sizeof ( uint64_t ) );
- get_rev_comp_seq_arr ( ( f_seq[j].kmer ), K_size, Kmer_arr_sz );
-
- if ( uint64_t_cmp ( seq[j].kmer, f_seq[j].kmer, Kmer_arr_sz ) > 0 )
- {
- flip[j] = 1;
- }
-
- if ( flip[j] == 1 )
- {
- memcpy ( temp_bits, & ( seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
- memcpy ( & ( seq[j].kmer ), & ( f_seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
- memcpy ( & ( f_seq[j].kmer ), temp_bits, Kmer_arr_sz * sizeof ( uint64_t ) );
- }
-
- hv[j] = MurmurHash64A ( ( seq[j].kmer ), sizeof ( seq[j] ), 0 );
- hash_idx[j] = ( size_t ) ( hv[j] % ht_sz );
- bktptr[j] = & ( ht->store_pos[hash_idx[j]] );
- found[j] = look_up_in_a_list2 ( &seq[j], &bktptr[j] );
- }
-
- int last_found = -1;
- int cur_found = -1;
- int h = -1;
-
- for ( int i = 0; i < OverlappingKmers; ++i )
- {
- if ( found[i] )
- {
- pthread_spin_lock ( &locks[hash_idx[i]] );
-
- if ( ( * ( bktptr[i] ) )->kmer_info.cov1 < 0xffff )
- {
- ( * ( bktptr[i] ) )->kmer_info.cov1++;
- }
-
- pthread_spin_unlock ( &locks[hash_idx[i]] );
- cur_found = i;
-
- if ( last_found != -1 )
- {
- if ( cur_found - last_found > gap )
- {
- fprintf ( stderr, "ERROR: cur_found - last_found > gap !\n" );
- exit ( -1 );
- }
-
- //add edge ...
- h = cur_found - last_found;
- uint64_t left_bits;
- get_sub_arr ( read->read_bits, read->readLen, last_found, h, &left_bits );
- pthread_spin_lock ( &locks[hash_idx[cur_found]] ); //lock for cur_found node to add left edge
-
- if ( flip[cur_found] == 0 )
- {
- struct edge_node ** edge_node_p2p = & ( ( * ( bktptr[cur_found] ) )->kmer_info.left );
-
- while ( ( *edge_node_p2p ) != NULL )
- {
- if ( ( *edge_node_p2p )->edge == ( uint64_t ) left_bits && ( ( *edge_node_p2p )->len + 1 ) == h )
- {
- if ( ( *edge_node_p2p )->edge_cov < 0x7f )
- { ( *edge_node_p2p )->edge_cov++;}
-
- break;
- }
-
- edge_node_p2p = & ( ( *edge_node_p2p )->nxt_edge );
- }
-
- if ( ( *edge_node_p2p ) == NULL )
- {
- ( *edge_node_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
- ( *edge_cnt ) ++;
- memset ( *edge_node_p2p, 0, sizeof ( struct edge_node ) );
- ( *edge_node_p2p )->edge = ( uint64_t ) left_bits;
- ( *edge_node_p2p )->edge_cov = 1;
- ( *edge_node_p2p )->len = h - 1;
- }
- }
- else
- {
- left_bits = get_rev_comp_seq ( left_bits, h );
- struct edge_node ** edge_node_p2p = & ( ( * ( bktptr[cur_found] ) )->kmer_info.right );
-
- while ( ( *edge_node_p2p ) != NULL )
- {
- if ( ( *edge_node_p2p )->edge == ( uint64_t ) left_bits && ( ( *edge_node_p2p )->len + 1 ) == h )
- {
- if ( ( *edge_node_p2p )->edge_cov < 0x7f )
- { ( *edge_node_p2p )->edge_cov++;}
-
- break;
- }
-
- edge_node_p2p = & ( ( *edge_node_p2p )->nxt_edge );
- }
-
- if ( ( *edge_node_p2p ) == NULL )
- {
- ( *edge_node_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
- ( *edge_cnt ) ++;
- memset ( *edge_node_p2p, 0, sizeof ( struct edge_node ) );
- ( *edge_node_p2p )->edge = ( uint64_t ) left_bits;
- ( *edge_node_p2p )->edge_cov = 1;
- ( *edge_node_p2p )->len = h - 1;
- }
- }
-
- pthread_spin_unlock ( &locks[hash_idx[cur_found]] );
- uint64_t right_bits;
- get_sub_arr ( read->read_bits, read->readLen, last_found + K_size, h, &right_bits );
- pthread_spin_lock ( &locks[hash_idx[last_found]] ); //lock ...
-
- if ( flip[last_found] == 1 )
- {
- right_bits = get_rev_comp_seq ( right_bits, h );
- struct edge_node ** edge_node_p2p = & ( ( * ( bktptr[last_found] ) )->kmer_info.left );
-
- while ( ( *edge_node_p2p ) != NULL )
- {
- if ( ( *edge_node_p2p )->edge == ( uint64_t ) right_bits && ( ( *edge_node_p2p )->len + 1 ) == h )
- {
- if ( ( *edge_node_p2p )->edge_cov < 0x7f )
- { ( *edge_node_p2p )->edge_cov++;}
-
- break;
- }
-
- edge_node_p2p = & ( ( *edge_node_p2p )->nxt_edge );
- }
-
- if ( ( *edge_node_p2p ) == NULL )
- {
- ( *edge_node_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
- ( *edge_cnt ) ++;
- memset ( *edge_node_p2p, 0, sizeof ( struct edge_node ) );
- ( *edge_node_p2p )->edge = ( uint64_t ) right_bits;
- ( *edge_node_p2p )->edge_cov = 1;
- ( *edge_node_p2p )->len = h - 1;
- }
- }
- else
- {
- struct edge_node ** edge_node_p2p = & ( ( * ( bktptr[last_found] ) )->kmer_info.right );
-
- while ( ( *edge_node_p2p ) != NULL )
- {
- if ( ( *edge_node_p2p )->edge == ( uint64_t ) right_bits && ( ( *edge_node_p2p )->len + 1 == h ) )
- {
- if ( ( *edge_node_p2p )->edge_cov < 0x7f )
- { ( *edge_node_p2p )->edge_cov++;}
-
- break;
- }
-
- edge_node_p2p = & ( ( *edge_node_p2p )->nxt_edge );
- }
-
- if ( ( *edge_node_p2p ) == NULL )
- {
- ( *edge_node_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
- ( *edge_cnt ) ++;
- memset ( *edge_node_p2p, 0, sizeof ( struct edge_node ) );
- ( *edge_node_p2p )->edge = ( uint64_t ) right_bits;
- ( *edge_node_p2p )->edge_cov = 1;
- ( *edge_node_p2p )->len = h - 1;
- }
- }
-
- pthread_spin_unlock ( &locks[hash_idx[last_found]] ); //lock ...
- }
-
- last_found = cur_found;
- }
- }
+ memcpy ( &f_seq[j], &seq[j], Kmer_arr_sz * sizeof ( uint64_t ) );
+ get_rev_comp_seq_arr ( ( f_seq[j].kmer ), K_size, Kmer_arr_sz );
+
+ if ( uint64_t_cmp ( seq[j].kmer, f_seq[j].kmer, Kmer_arr_sz ) > 0 )
+ {
+ flip[j] = 1;
+ }
+
+ if ( flip[j] == 1 )
+ {
+ memcpy ( temp_bits, & ( seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
+ memcpy ( & ( seq[j].kmer ), & ( f_seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
+ memcpy ( & ( f_seq[j].kmer ), temp_bits, Kmer_arr_sz * sizeof ( uint64_t ) );
+ }
+
+ hv[j] = MurmurHash64A ( ( seq[j].kmer ), sizeof ( seq[j] ), 0 );
+ hash_idx[j] = ( size_t ) ( hv[j] % ht_sz );
+ bktptr[j] = & ( ht->store_pos[hash_idx[j]] );
+ found[j] = look_up_in_a_list2 ( &seq[j], &bktptr[j] );
+ }
+
+ int last_found = -1;
+ int cur_found = -1;
+ int h = -1;
+
+ for ( int i = 0; i < OverlappingKmers; ++i )
+ {
+ if ( found[i] )
+ {
+ pthread_spin_lock ( &locks[hash_idx[i]] );
+
+ if ( ( * ( bktptr[i] ) )->kmer_info.cov1 < 0xffff )
+ {
+ ( * ( bktptr[i] ) )->kmer_info.cov1++;
+ }
+
+ pthread_spin_unlock ( &locks[hash_idx[i]] );
+ cur_found = i;
+
+ if ( last_found != -1 )
+ {
+ if ( cur_found - last_found > gap )
+ {
+ fprintf ( stderr, "ERROR: cur_found - last_found > gap !\n" );
+ exit ( -1 );
+ }
+
+ //add edge ...
+ h = cur_found - last_found;
+ uint64_t left_bits;
+ get_sub_arr ( read->read_bits, read->readLen, last_found, h, &left_bits );
+ pthread_spin_lock ( &locks[hash_idx[cur_found]] ); //lock for cur_found node to add left edge
+
+ if ( flip[cur_found] == 0 )
+ {
+ struct edge_node **edge_node_p2p = & ( ( * ( bktptr[cur_found] ) )->kmer_info.left );
+
+ while ( ( *edge_node_p2p ) != NULL )
+ {
+ if ( ( *edge_node_p2p )->edge == ( uint64_t ) left_bits && ( ( *edge_node_p2p )->len + 1 ) == h )
+ {
+ if ( ( *edge_node_p2p )->edge_cov < 0x7f )
+ {
+ ( *edge_node_p2p )->edge_cov++;
+ }
+
+ break;
+ }
+
+ edge_node_p2p = & ( ( *edge_node_p2p )->nxt_edge );
+ }
+
+ if ( ( *edge_node_p2p ) == NULL )
+ {
+ ( *edge_node_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
+ ( *edge_cnt ) ++;
+ memset ( *edge_node_p2p, 0, sizeof ( struct edge_node ) );
+ ( *edge_node_p2p )->edge = ( uint64_t ) left_bits;
+ ( *edge_node_p2p )->edge_cov = 1;
+ ( *edge_node_p2p )->len = h - 1;
+ }
+ }
+ else
+ {
+ left_bits = get_rev_comp_seq ( left_bits, h );
+ struct edge_node **edge_node_p2p = & ( ( * ( bktptr[cur_found] ) )->kmer_info.right );
+
+ while ( ( *edge_node_p2p ) != NULL )
+ {
+ if ( ( *edge_node_p2p )->edge == ( uint64_t ) left_bits && ( ( *edge_node_p2p )->len + 1 ) == h )
+ {
+ if ( ( *edge_node_p2p )->edge_cov < 0x7f )
+ {
+ ( *edge_node_p2p )->edge_cov++;
+ }
+
+ break;
+ }
+
+ edge_node_p2p = & ( ( *edge_node_p2p )->nxt_edge );
+ }
+
+ if ( ( *edge_node_p2p ) == NULL )
+ {
+ ( *edge_node_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
+ ( *edge_cnt ) ++;
+ memset ( *edge_node_p2p, 0, sizeof ( struct edge_node ) );
+ ( *edge_node_p2p )->edge = ( uint64_t ) left_bits;
+ ( *edge_node_p2p )->edge_cov = 1;
+ ( *edge_node_p2p )->len = h - 1;
+ }
+ }
+
+ pthread_spin_unlock ( &locks[hash_idx[cur_found]] );
+ uint64_t right_bits;
+ get_sub_arr ( read->read_bits, read->readLen, last_found + K_size, h, &right_bits );
+ pthread_spin_lock ( &locks[hash_idx[last_found]] ); //lock ...
+
+ if ( flip[last_found] == 1 )
+ {
+ right_bits = get_rev_comp_seq ( right_bits, h );
+ struct edge_node **edge_node_p2p = & ( ( * ( bktptr[last_found] ) )->kmer_info.left );
+
+ while ( ( *edge_node_p2p ) != NULL )
+ {
+ if ( ( *edge_node_p2p )->edge == ( uint64_t ) right_bits && ( ( *edge_node_p2p )->len + 1 ) == h )
+ {
+ if ( ( *edge_node_p2p )->edge_cov < 0x7f )
+ {
+ ( *edge_node_p2p )->edge_cov++;
+ }
+
+ break;
+ }
+
+ edge_node_p2p = & ( ( *edge_node_p2p )->nxt_edge );
+ }
+
+ if ( ( *edge_node_p2p ) == NULL )
+ {
+ ( *edge_node_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
+ ( *edge_cnt ) ++;
+ memset ( *edge_node_p2p, 0, sizeof ( struct edge_node ) );
+ ( *edge_node_p2p )->edge = ( uint64_t ) right_bits;
+ ( *edge_node_p2p )->edge_cov = 1;
+ ( *edge_node_p2p )->len = h - 1;
+ }
+ }
+ else
+ {
+ struct edge_node **edge_node_p2p = & ( ( * ( bktptr[last_found] ) )->kmer_info.right );
+
+ while ( ( *edge_node_p2p ) != NULL )
+ {
+ if ( ( *edge_node_p2p )->edge == ( uint64_t ) right_bits && ( ( *edge_node_p2p )->len + 1 == h ) )
+ {
+ if ( ( *edge_node_p2p )->edge_cov < 0x7f )
+ {
+ ( *edge_node_p2p )->edge_cov++;
+ }
+
+ break;
+ }
+
+ edge_node_p2p = & ( ( *edge_node_p2p )->nxt_edge );
+ }
+
+ if ( ( *edge_node_p2p ) == NULL )
+ {
+ ( *edge_node_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
+ ( *edge_cnt ) ++;
+ memset ( *edge_node_p2p, 0, sizeof ( struct edge_node ) );
+ ( *edge_node_p2p )->edge = ( uint64_t ) right_bits;
+ ( *edge_node_p2p )->edge_cov = 1;
+ ( *edge_node_p2p )->len = h - 1;
+ }
+ }
+
+ pthread_spin_unlock ( &locks[hash_idx[last_found]] ); //lock ...
+ }
+
+ last_found = cur_found;
+ }
+ }
}
@@ -528,32 +550,32 @@ Output:
Return:
None.
*************************************************/
-void SwitchBuckets ( hashtable2 * ht2, int K_size )
+void SwitchBuckets ( hashtable2 *ht2, int K_size )
{
- size_t ht_sz;
- ht_sz = ht2->ht_sz;
- bucket2_r1 * store_pos_o, *store_pos_t;
- bucket2 * store_pos_n;
- bucket2 ** bktp2p;
-
- for ( size_t i = 0; i < ht_sz; ++i )
- {
- bktp2p = & ( ht2->store_pos[i] );
- store_pos_o = ( bucket2_r1 * ) ht2->store_pos[i];
-
- while ( store_pos_o != NULL )
- {
- store_pos_n = ( bucket2 * ) malloc ( sizeof ( struct bucket2 ) );
- memset ( store_pos_n, 0, sizeof ( bucket2 ) );
- store_pos_n->kmer_t2 = store_pos_o->kmer_t2;
- store_pos_n->kmer_info.cov1 = store_pos_o->kmer_info.cov1;
- *bktp2p = store_pos_n;
- bktp2p = & ( store_pos_n->nxt_bucket );
- store_pos_t = store_pos_o;
- store_pos_o = store_pos_o->nxt_bucket;
- free ( store_pos_t );
- }
- }
+ size_t ht_sz;
+ ht_sz = ht2->ht_sz;
+ bucket2_r1 *store_pos_o, *store_pos_t;
+ bucket2 *store_pos_n;
+ bucket2 **bktp2p;
+
+ for ( size_t i = 0; i < ht_sz; ++i )
+ {
+ bktp2p = & ( ht2->store_pos[i] );
+ store_pos_o = ( bucket2_r1 * ) ht2->store_pos[i];
+
+ while ( store_pos_o != NULL )
+ {
+ store_pos_n = ( bucket2 * ) malloc ( sizeof ( struct bucket2 ) );
+ memset ( store_pos_n, 0, sizeof ( bucket2 ) );
+ store_pos_n->kmer_t2 = store_pos_o->kmer_t2;
+ store_pos_n->kmer_info.cov1 = store_pos_o->kmer_info.cov1;
+ *bktp2p = store_pos_n;
+ bktp2p = & ( store_pos_n->nxt_bucket );
+ store_pos_t = store_pos_o;
+ store_pos_o = store_pos_o->nxt_bucket;
+ free ( store_pos_t );
+ }
+ }
}
@@ -619,145 +641,151 @@ void SavingSparseKmerGraph2(hashtable2 *ht,char * outfile)
} */
-void SavingSparseKmerGraph2 ( hashtable2 * ht, char * outfile )
+void SavingSparseKmerGraph2 ( hashtable2 *ht, char *outfile )
{
- FILE * o_ht_idx, *o_ht_content, *o_cov;
- string ht_idx_name, ht_content_name, kmer_freq;
- ht_idx_name.append ( outfile ).append ( ".ht_idx" );
- ht_content_name.append ( outfile ).append ( ".ht_content" );
- kmer_freq.append ( outfile ).append ( ".kmerFreq" );
- size_t cov_hist[256];
- memset ( cov_hist, 0, 256 * sizeof ( size_t ) );
- o_ht_idx = fopen ( ht_idx_name.c_str(), "wb" );
- o_ht_content = fopen ( ht_content_name.c_str(), "wb" );
- o_cov = fopen ( kmer_freq.c_str(), "w" );
-
- if ( ! ( o_ht_idx && o_ht_content && o_cov ) )
- {
- fprintf ( stderr, "ERROR: failed saving sparse kmer graph!\n" );
- return;
- }
-
- fprintf ( o_ht_idx, "Hashtable Size: \n" );
- fprintf ( o_ht_idx, "%llu\n", ht->ht_sz );
- bucket2 * bktptr = NULL;
- struct edge_node * edge_ptr;
-
- for ( size_t i = 0; i < ht->ht_sz; ++i )
- {
- size_t list_sz = 0;
- bktptr = ht->store_pos[i];
-
- while ( bktptr != NULL )
- {
- if ( fwrite ( ( char * ) bktptr, sizeof ( struct bucket2 ), 1, o_ht_content ) )
- {
- edge_ptr = bktptr->kmer_info.left;
-
- while ( edge_ptr != NULL )
- {
- fwrite ( ( char * ) edge_ptr, sizeof ( struct edge_node ), 1, o_ht_content );
- edge_ptr = edge_ptr->nxt_edge;
- }
-
- edge_ptr = bktptr->kmer_info.right;
-
- while ( edge_ptr != NULL )
- {
- fwrite ( ( char * ) edge_ptr, sizeof ( struct edge_node ), 1, o_ht_content );
- edge_ptr = edge_ptr->nxt_edge;
- }
-
- int cov = bktptr->kmer_info.cov1;
-
- if ( cov >= 255 )
- {
- cov_hist[255]++;
- }
- else
- {
- cov_hist[cov]++;
- }
-
- bktptr = bktptr->nxt_bucket;
- list_sz++;
- }
- else
- {cerr << "Write error!" << endl;}
- }
-
- fprintf ( o_ht_idx, "%llu\n", list_sz );
- }
-
- for ( int i = 1; i < 256; i++ )
- {
- fprintf ( o_cov, "%d\t%llu\n", i, cov_hist[i] );
- }
-
- fclose ( o_ht_idx );
- fclose ( o_ht_content );
- fclose ( o_cov );
+ FILE *o_ht_idx, *o_ht_content, *o_cov;
+ string ht_idx_name, ht_content_name, kmer_freq;
+ ht_idx_name.append ( outfile ).append ( ".ht_idx" );
+ ht_content_name.append ( outfile ).append ( ".ht_content" );
+ kmer_freq.append ( outfile ).append ( ".kmerFreq" );
+ size_t cov_hist[256];
+ memset ( cov_hist, 0, 256 * sizeof ( size_t ) );
+ o_ht_idx = fopen ( ht_idx_name.c_str(), "wb" );
+ o_ht_content = fopen ( ht_content_name.c_str(), "wb" );
+ o_cov = fopen ( kmer_freq.c_str(), "w" );
+
+ if ( ! ( o_ht_idx && o_ht_content && o_cov ) )
+ {
+ fprintf ( stderr, "ERROR: failed saving sparse kmer graph!\n" );
+ return;
+ }
+
+ fprintf ( o_ht_idx, "Hashtable Size: \n" );
+ fprintf ( o_ht_idx, "%llu\n", ht->ht_sz );
+ bucket2 *bktptr = NULL;
+ struct edge_node *edge_ptr;
+
+ for ( size_t i = 0; i < ht->ht_sz; ++i )
+ {
+ size_t list_sz = 0;
+ bktptr = ht->store_pos[i];
+
+ while ( bktptr != NULL )
+ {
+ if ( fwrite ( ( char * ) bktptr, sizeof ( struct bucket2 ), 1, o_ht_content ) )
+ {
+ edge_ptr = bktptr->kmer_info.left;
+
+ while ( edge_ptr != NULL )
+ {
+ fwrite ( ( char * ) edge_ptr, sizeof ( struct edge_node ), 1, o_ht_content );
+ edge_ptr = edge_ptr->nxt_edge;
+ }
+
+ edge_ptr = bktptr->kmer_info.right;
+
+ while ( edge_ptr != NULL )
+ {
+ fwrite ( ( char * ) edge_ptr, sizeof ( struct edge_node ), 1, o_ht_content );
+ edge_ptr = edge_ptr->nxt_edge;
+ }
+
+ int cov = bktptr->kmer_info.cov1;
+
+ if ( cov >= 255 )
+ {
+ cov_hist[255]++;
+ }
+ else
+ {
+ cov_hist[cov]++;
+ }
+
+ bktptr = bktptr->nxt_bucket;
+ list_sz++;
+ }
+ else
+ {
+ cerr << "Write error!" << endl;
+ }
+ }
+
+ fprintf ( o_ht_idx, "%llu\n", list_sz );
+ }
+
+ for ( int i = 1; i < 256; i++ )
+ {
+ fprintf ( o_cov, "%d\t%llu\n", i, cov_hist[i] );
+ }
+
+ fclose ( o_ht_idx );
+ fclose ( o_ht_content );
+ fclose ( o_cov );
}
-void LoadingSparseKmerGraph2 ( hashtable2 * ht, char * outfile )
+void LoadingSparseKmerGraph2 ( hashtable2 *ht, char *outfile )
{
- string ht_idx_name, ht_content_name;
- ht_idx_name.append ( outfile ).append ( ".ht_idx" );
- ht_content_name.append ( outfile ).append ( ".ht_content" );
- ifstream in_ht_idx ( ht_idx_name.c_str(), ios_base::in | ios_base::binary ), in_ht_content ( ht_content_name.c_str(), ios_base::in | ios_base::binary );
- size_t ht_sz;
- string s;
- getline ( in_ht_idx, s );
- getline ( in_ht_idx, s );
- ht_sz = atoi ( s.c_str() ); //cerr<<ht_sz<<endl;
- Init_HT2 ( ht, ht_sz );
- struct edge_node ** edge_p2p;
-
- for ( int i = 0; i < ht_sz; ++i )
- {
- int list_sz;
- getline ( in_ht_idx, s );
-
- if ( s[s.size() - 1] == '\r' || s[s.size() - 1] == '\n' )
- {s.resize ( s.size() - 1 );}
-
- list_sz = atoi ( s.c_str() ); //cerr<<list_sz<<endl;
- struct bucket2 ** bktp2p = & ( ht->store_pos[i] );
- *bktp2p = NULL;
-
- for ( int j = 0; j < list_sz; ++j )
- {
- *bktp2p = ( struct bucket2 * ) malloc ( sizeof ( struct bucket2 ) );
-
- if ( in_ht_content.read ( ( char * ) ( *bktp2p ), sizeof ( struct bucket2 ) ) )
- {
- ( *bktp2p )->nxt_bucket = NULL;
- ( *bktp2p )->kmer_info.used = 0;
- edge_p2p = & ( ( *bktp2p )->kmer_info.left );
-
- while ( ( *edge_p2p ) != NULL )
- {
- ( *edge_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
- in_ht_content.read ( ( char * ) ( *edge_p2p ), sizeof ( struct edge_node ) );
- edge_p2p = & ( ( *edge_p2p )->nxt_edge );
- }
-
- edge_p2p = & ( ( *bktp2p )->kmer_info.right );
-
- while ( ( *edge_p2p ) != NULL )
- {
- ( *edge_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
- in_ht_content.read ( ( char * ) ( *edge_p2p ), sizeof ( struct edge_node ) );
- edge_p2p = & ( ( *edge_p2p )->nxt_edge );
- }
-
- bktp2p = & ( ( *bktp2p )->nxt_bucket );
- }
- else
- {cerr << "Read error!" << endl;}
- }
- }
+ string ht_idx_name, ht_content_name;
+ ht_idx_name.append ( outfile ).append ( ".ht_idx" );
+ ht_content_name.append ( outfile ).append ( ".ht_content" );
+ ifstream in_ht_idx ( ht_idx_name.c_str(), ios_base::in | ios_base::binary ), in_ht_content ( ht_content_name.c_str(), ios_base::in | ios_base::binary );
+ size_t ht_sz;
+ string s;
+ getline ( in_ht_idx, s );
+ getline ( in_ht_idx, s );
+ ht_sz = atoi ( s.c_str() ); //cerr<<ht_sz<<endl;
+ Init_HT2 ( ht, ht_sz );
+ struct edge_node **edge_p2p;
+
+ for ( int i = 0; i < ht_sz; ++i )
+ {
+ int list_sz;
+ getline ( in_ht_idx, s );
+
+ if ( s[s.size() - 1] == '\r' || s[s.size() - 1] == '\n' )
+ {
+ s.resize ( s.size() - 1 );
+ }
+
+ list_sz = atoi ( s.c_str() ); //cerr<<list_sz<<endl;
+ struct bucket2 **bktp2p = & ( ht->store_pos[i] );
+ *bktp2p = NULL;
+
+ for ( int j = 0; j < list_sz; ++j )
+ {
+ *bktp2p = ( struct bucket2 * ) malloc ( sizeof ( struct bucket2 ) );
+
+ if ( in_ht_content.read ( ( char * ) ( *bktp2p ), sizeof ( struct bucket2 ) ) )
+ {
+ ( *bktp2p )->nxt_bucket = NULL;
+ ( *bktp2p )->kmer_info.used = 0;
+ edge_p2p = & ( ( *bktp2p )->kmer_info.left );
+
+ while ( ( *edge_p2p ) != NULL )
+ {
+ ( *edge_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
+ in_ht_content.read ( ( char * ) ( *edge_p2p ), sizeof ( struct edge_node ) );
+ edge_p2p = & ( ( *edge_p2p )->nxt_edge );
+ }
+
+ edge_p2p = & ( ( *bktp2p )->kmer_info.right );
+
+ while ( ( *edge_p2p ) != NULL )
+ {
+ ( *edge_p2p ) = ( struct edge_node * ) malloc ( sizeof ( struct edge_node ) );
+ in_ht_content.read ( ( char * ) ( *edge_p2p ), sizeof ( struct edge_node ) );
+ edge_p2p = & ( ( *edge_p2p )->nxt_edge );
+ }
+
+ bktp2p = & ( ( *bktp2p )->nxt_bucket );
+ }
+ else
+ {
+ cerr << "Read error!" << endl;
+ }
+ }
+ }
}
diff --git a/sparsePregraph/build_preArc.cpp b/sparsePregraph/build_preArc.cpp
index ad4f2ca..42096f8 100644
--- a/sparsePregraph/build_preArc.cpp
+++ b/sparsePregraph/build_preArc.cpp
@@ -1,7 +1,7 @@
/*
* build_preArc.cpp
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -65,10 +65,10 @@
-void init_vertex_hash ( vertex_hash2 * v_ht, size_t sz )
+void init_vertex_hash ( vertex_hash2 *v_ht, size_t sz )
{
- v_ht->ht_sz = sz;
- v_ht->store_pos = ( vertex2 ** ) calloc ( sz, sizeof ( vertex2 * ) );
+ v_ht->ht_sz = sz;
+ v_ht->store_pos = ( vertex2 ** ) calloc ( sz, sizeof ( vertex2 * ) );
}
@@ -87,168 +87,171 @@ Output:
Return:
None.
*************************************************/
-void build_vertexes ( vertex_hash2 * v_ht, int K_size, char * edge_file )
+void build_vertexes ( vertex_hash2 *v_ht, int K_size, char *edge_file )
{
- FILE * fp;
- kmer_t2 from_kmer, to_kmer;
- size_t line_len, edge_len_left;
- int edge_len;
- int cvg;
- bool bal_ed;//����Ϊ0
- const int BUFF_LEN = 1024;
- char line[BUFF_LEN];
- char str[32];
- char to_buff[BUFF_LEN];//buffer 2k edge seq BUFF_LEN>4*K_size
- int processed = 0; //0��ʾδ���� 1��ʾ ������� 2 ��ʾ�Ѿ�������from_vertex ,to_vertex ��û�д���
- size_t edge_id = 0;
- fp = fopen ( edge_file, "r" );
-
- if ( !fp )
- {
- fprintf ( stderr, "ERROR: Cannot open edge_file %s. Now exit to system...\n", edge_file );
- exit ( -1 );
- }
-
- vertex2 * v_tmp;
- edge_starter2 * e_tmp;
- int is_found;
- bool is_left;
-
- while ( fgets ( line, BUFF_LEN, fp ) != NULL )
- {
- //debug<<"processed "<<processed<<endl;
- if ( line[0] == '>' ) //get one edge length, from vertex, to vertex,cvg,bal
- {
- if ( processed == 1 && bal_ed )
- {
- edge_id++;//������ǻ���
- }
+ FILE *fp;
+ kmer_t2 from_kmer, to_kmer;
+ size_t line_len, edge_len_left;
+ int edge_len;
+ int cvg;
+ bool bal_ed;//����Ϊ0
+ const int BUFF_LEN = 1024;
+ char line[BUFF_LEN];
+ char str[32];
+ char to_buff[BUFF_LEN];//buffer 2k edge seq BUFF_LEN>4*K_size
+ int processed = 0; //0��ʾδ���� 1��ʾ ������� 2 ��ʾ�Ѿ�������from_vertex ,to_vertex ��û�д���
+ size_t edge_id = 0;
+ fp = fopen ( edge_file, "r" );
+
+ if ( !fp )
+ {
+ fprintf ( stderr, "ERROR: Cannot open edge_file %s. Now exit to system...\n", edge_file );
+ exit ( -1 );
+ }
+
+ vertex2 *v_tmp;
+ edge_starter2 *e_tmp;
+ int is_found;
+ bool is_left;
+
+ while ( fgets ( line, BUFF_LEN, fp ) != NULL )
+ {
+ //debug<<"processed "<<processed<<endl;
+ if ( line[0] == '>' ) //get one edge length, from vertex, to vertex,cvg,bal
+ {
+ if ( processed == 1 && bal_ed )
+ {
+ edge_id++;//������ǻ���
+ }
#ifdef _63MER_
- sscanf ( line + 7, "%d,%llx %llx ,%llx %llx ,%s %d,%d", &edge_len,
- & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], str, &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
+ sscanf ( line + 7, "%d,%llx %llx ,%llx %llx ,%s %d,%d", &edge_len,
+ & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], str, &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
#endif
#ifdef _127MER_
- sscanf ( line + 7, "%d,%llx %llx %llx %llx ,%llx %llx %llx %llx ,%s %d,%d", &edge_len,
- & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( from_kmer.kmer ) [2], & ( from_kmer.kmer ) [3],
- & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], & ( to_kmer.kmer ) [2], & ( to_kmer.kmer ) [3], str, &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
+ sscanf ( line + 7, "%d,%llx %llx %llx %llx ,%llx %llx %llx %llx ,%s %d,%d", &edge_len,
+ & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( from_kmer.kmer ) [2], & ( from_kmer.kmer ) [3],
+ & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], & ( to_kmer.kmer ) [2], & ( to_kmer.kmer ) [3], str, &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
#endif
- edge_len_left = K_size + edge_len;
- processed = 0;
- edge_id++;// current edge positive strand id
- //debug<<line<<"edge_id "<<edge_id<<endl;
- }
- else
- {
- if ( processed == 0 )
- {
- line_len = strlen ( line );
-
- if ( line[line_len - 1] == '\n' )
- {
- line[line_len - 1] = '\0';
- line_len --;
- }
-
- if ( edge_len_left - line_len == 0 ) //edge completely loaded
- {
- //do all process
- process_edge ( v_ht, K_size, line, line_len, 1, edge_id, bal_ed );
- processed = 1;
- edge_len_left = 0;
- continue;
- }
- else //edge partly loaded at the first time.
- {
- if ( line_len < 2 * K_size ) //line_len < 2*K_size &&edge_len_left - line_len > 0
- {
- fprintf ( stderr, "ERROR:it won't happen in 63mer/127mer\n" );
- exit ( 1 );
- }
- else
- {
- process_edge ( v_ht, K_size, line, line_len, 2, edge_id, bal_ed );
- processed = 2;
- edge_len_left -= line_len;
-
- if ( edge_len_left >= 2 * K_size )
- {
- //no need to buf the to kmer seq
- }
- else if ( edge_len_left < 2 * K_size )
- {
- //to_buff[100];/ copy the last 2K char of line to to_buff already no '\n'
- strcpy ( to_buff, line + ( line_len - 2 * K_size ) );
- }
- else
- {
- fprintf ( stderr, "ERROR: in cal the edge_len_left!!\n" );
- exit ( 1 );
- }
- }
- }
- }
- else if ( processed == 2 )
- {
- //if(line[0]=='\n') continue;
- line_len = strlen ( line );
-
- if ( line[line_len - 1] == '\n' )
- {
- line[line_len - 1] = '\0';
- line_len --;
- }
-
- edge_len_left -= line_len;
-
- if ( edge_len_left == 0 ) //load the complete edge sequence
- {
- //process the to kmer
- if ( line_len >= 2 * K_size )
- {
- process_edge ( v_ht, K_size, line, line_len, 3, edge_id, bal_ed );
- }
- else
- {
- //need to use the to_buff
- int buf_len = strlen ( to_buff );
- strcpy ( to_buff + buf_len, line );
- buf_len = strlen ( to_buff );
- process_edge ( v_ht, K_size, to_buff, buf_len, 3, edge_id, bal_ed );
- }
-
- processed = 1;
- continue;
- }
- else
- {
- if ( edge_len_left >= 2 * K_size )
- {
- //no need to buf the to kmer seq
- }
- else if ( edge_len_left < 2 * K_size )
- {
- //to_buff[100];/ copy the last 2K char of line to to_buff
- strcpy ( to_buff, line + ( line_len - 2 * K_size ) );
- }
- else
- {
- fprintf ( stderr, "ERROR: in cal the edge_len_left!!\n" );
- exit ( 1 );
- }
- }
- }
- else
- {
- if ( line[0] == '\n' ) { continue; } //��len = 1023ʱ
-
- fprintf ( stderr, "ERROR: in cal the status_processed !! %d \n", processed );
- exit ( 1 );
- }
- }
- }
-
- fclose ( fp );
+ edge_len_left = K_size + edge_len;
+ processed = 0;
+ edge_id++;// current edge positive strand id
+ //debug<<line<<"edge_id "<<edge_id<<endl;
+ }
+ else
+ {
+ if ( processed == 0 )
+ {
+ line_len = strlen ( line );
+
+ if ( line[line_len - 1] == '\n' )
+ {
+ line[line_len - 1] = '\0';
+ line_len --;
+ }
+
+ if ( edge_len_left - line_len == 0 ) //edge completely loaded
+ {
+ //do all process
+ process_edge ( v_ht, K_size, line, line_len, 1, edge_id, bal_ed );
+ processed = 1;
+ edge_len_left = 0;
+ continue;
+ }
+ else //edge partly loaded at the first time.
+ {
+ if ( line_len < 2 * K_size ) //line_len < 2*K_size &&edge_len_left - line_len > 0
+ {
+ fprintf ( stderr, "ERROR:it won't happen in 63mer/127mer\n" );
+ exit ( 1 );
+ }
+ else
+ {
+ process_edge ( v_ht, K_size, line, line_len, 2, edge_id, bal_ed );
+ processed = 2;
+ edge_len_left -= line_len;
+
+ if ( edge_len_left >= 2 * K_size )
+ {
+ //no need to buf the to kmer seq
+ }
+ else if ( edge_len_left < 2 * K_size )
+ {
+ //to_buff[100];/ copy the last 2K char of line to to_buff already no '\n'
+ strcpy ( to_buff, line + ( line_len - 2 * K_size ) );
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: in cal the edge_len_left!!\n" );
+ exit ( 1 );
+ }
+ }
+ }
+ }
+ else if ( processed == 2 )
+ {
+ //if(line[0]=='\n') continue;
+ line_len = strlen ( line );
+
+ if ( line[line_len - 1] == '\n' )
+ {
+ line[line_len - 1] = '\0';
+ line_len --;
+ }
+
+ edge_len_left -= line_len;
+
+ if ( edge_len_left == 0 ) //load the complete edge sequence
+ {
+ //process the to kmer
+ if ( line_len >= 2 * K_size )
+ {
+ process_edge ( v_ht, K_size, line, line_len, 3, edge_id, bal_ed );
+ }
+ else
+ {
+ //need to use the to_buff
+ int buf_len = strlen ( to_buff );
+ strcpy ( to_buff + buf_len, line );
+ buf_len = strlen ( to_buff );
+ process_edge ( v_ht, K_size, to_buff, buf_len, 3, edge_id, bal_ed );
+ }
+
+ processed = 1;
+ continue;
+ }
+ else
+ {
+ if ( edge_len_left >= 2 * K_size )
+ {
+ //no need to buf the to kmer seq
+ }
+ else if ( edge_len_left < 2 * K_size )
+ {
+ //to_buff[100];/ copy the last 2K char of line to to_buff
+ strcpy ( to_buff, line + ( line_len - 2 * K_size ) );
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: in cal the edge_len_left!!\n" );
+ exit ( 1 );
+ }
+ }
+ }
+ else
+ {
+ if ( line[0] == '\n' )
+ {
+ continue; //��len = 1023ʱ
+ }
+
+ fprintf ( stderr, "ERROR: in cal the status_processed !! %d \n", processed );
+ exit ( 1 );
+ }
+ }
+ }
+
+ fclose ( fp );
}
@@ -270,259 +273,265 @@ Output:
Return:
None.
*************************************************/
-static void process_edge ( vertex_hash2 * v_ht, int K_size, char * seq, int len, int type, size_t edge_id, bool bal_edge )
+static void process_edge ( vertex_hash2 *v_ht, int K_size, char *seq, int len, int type, size_t edge_id, bool bal_edge )
{
- kmer_t2 vertex_kmer;
- kmer_t2 edge_kmer;
- vertex2 * v_tmp;
- edge_starter2 * e_tmp;
- int is_found;
- bool is_left;
- int edge_kmer_len;
-
- switch ( type )
- {
- case 1: //process all ..
- //process the head
- get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer );
-
- if ( len <= K_size + gap ) //get the last kmer
- {
- get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer );
- edge_kmer_len = len - K_size;
- }
- else
- {
- //get_kmer_from_seq(seq, len, K_size, K_size,&edge_kmer);
- get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer );
- edge_kmer_len = gap;
- }
-
- is_left = 0;//right
- v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
- put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id );
- reverseCompKmer ( &vertex_kmer, K_size );
- reverseCompKmer ( &edge_kmer, K_size );
- is_left = 1;//left
- v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
- put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge );
- //process the tail
- get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer );
-
- if ( len <= K_size + gap ) //get the first kmer
- {
- get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer );
- edge_kmer_len = len - K_size;
- }
- else
- {
- get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer );
- edge_kmer_len = gap;
- }
-
- is_left = 1;
- v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
- put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id );
- reverseCompKmer ( &vertex_kmer, K_size );
- reverseCompKmer ( &edge_kmer, K_size );
- is_left = 0;//right
- v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
- put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge );
- break;
- case 2:
- //process only the head
- get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer );
-
- if ( len <= K_size + gap )
- {
- get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer );
- edge_kmer_len = len - K_size;
- }
- else
- {
- get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer );
- edge_kmer_len = gap;
- }
-
- is_left = 0;//right
- v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
- put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id );
- reverseCompKmer ( &vertex_kmer, K_size );
- reverseCompKmer ( &edge_kmer, K_size );
- is_left = 1;//left
- v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
- put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge );
- break;
- case 3:
- //process only the tail
- get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer );
-
- if ( len <= K_size + gap )
- {
- get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer );
- edge_kmer_len = len - K_size;
- }
- else
- {
- get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer );
- edge_kmer_len = gap;
- }
-
- is_left = 1;
- v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
- put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id );
- reverseCompKmer ( &vertex_kmer, K_size );
- reverseCompKmer ( &edge_kmer, K_size );
- is_left = 0;//right
- v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
- put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge );
- break;
- default:
- fprintf ( stderr, "ERROR: wrong process type in process_edge()\n" );
- exit ( 1 );
- }
+ kmer_t2 vertex_kmer;
+ kmer_t2 edge_kmer;
+ vertex2 *v_tmp;
+ edge_starter2 *e_tmp;
+ int is_found;
+ bool is_left;
+ int edge_kmer_len;
+
+ switch ( type )
+ {
+ case 1: //process all ..
+ //process the head
+ get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer );
+
+ if ( len <= K_size + gap ) //get the last kmer
+ {
+ get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer );
+ edge_kmer_len = len - K_size;
+ }
+ else
+ {
+ //get_kmer_from_seq(seq, len, K_size, K_size,&edge_kmer);
+ get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer );
+ edge_kmer_len = gap;
+ }
+
+ is_left = 0;//right
+ v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
+ put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id );
+ reverseCompKmer ( &vertex_kmer, K_size );
+ reverseCompKmer ( &edge_kmer, K_size );
+ is_left = 1;//left
+ v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
+ put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge );
+ //process the tail
+ get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer );
+
+ if ( len <= K_size + gap ) //get the first kmer
+ {
+ get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer );
+ edge_kmer_len = len - K_size;
+ }
+ else
+ {
+ get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer );
+ edge_kmer_len = gap;
+ }
+
+ is_left = 1;
+ v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
+ put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id );
+ reverseCompKmer ( &vertex_kmer, K_size );
+ reverseCompKmer ( &edge_kmer, K_size );
+ is_left = 0;//right
+ v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
+ put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge );
+ break;
+
+ case 2:
+ //process only the head
+ get_kmer_from_seq ( seq, len, K_size, 0, &vertex_kmer );
+
+ if ( len <= K_size + gap )
+ {
+ get_kmer_from_seq ( seq, len, K_size, len - K_size, &edge_kmer );
+ edge_kmer_len = len - K_size;
+ }
+ else
+ {
+ get_kmer_from_seq ( seq, len, K_size, gap, &edge_kmer );
+ edge_kmer_len = gap;
+ }
+
+ is_left = 0;//right
+ v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
+ put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id );
+ reverseCompKmer ( &vertex_kmer, K_size );
+ reverseCompKmer ( &edge_kmer, K_size );
+ is_left = 1;//left
+ v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
+ put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge );
+ break;
+
+ case 3:
+ //process only the tail
+ get_kmer_from_seq ( seq, len, K_size, len - K_size, &vertex_kmer );
+
+ if ( len <= K_size + gap )
+ {
+ get_kmer_from_seq ( seq, len, K_size, 0, &edge_kmer );
+ edge_kmer_len = len - K_size;
+ }
+ else
+ {
+ get_kmer_from_seq ( seq, len, K_size, len - K_size - gap, &edge_kmer );
+ edge_kmer_len = gap;
+ }
+
+ is_left = 1;
+ v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
+ put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id );
+ reverseCompKmer ( &vertex_kmer, K_size );
+ reverseCompKmer ( &edge_kmer, K_size );
+ is_left = 0;//right
+ v_tmp = put_vertex ( v_ht, vertex_kmer, is_found );
+ put_edge ( v_tmp, edge_kmer, is_left, edge_kmer_len, edge_id + bal_edge );
+ break;
+
+ default:
+ fprintf ( stderr, "ERROR: wrong process type in process_edge()\n" );
+ exit ( 1 );
+ }
}
-static vertex2 * put_vertex ( vertex_hash2 * v_ht, kmer_t2 vertex_kmer, int & is_found ) //63 127 differ fixed
+static vertex2 *put_vertex ( vertex_hash2 *v_ht, kmer_t2 vertex_kmer, int &is_found ) //63 127 differ fixed
{
- uint64_t hv = MurmurHash64A ( vertex_kmer.kmer, sizeof ( kmer_t2 ), 0 ); //hash value
- uint64_t idx = ( size_t ) ( hv % v_ht->ht_sz );
- vertex2 * ver = ( v_ht->store_pos ) [idx];
-
- if ( !ver )
- {
- ( v_ht->store_pos ) [idx] = ( vertex2 * ) malloc ( sizeof ( vertex2 ) );
- ver = ( v_ht->store_pos ) [idx];
- ver->kmer_t2 = vertex_kmer;
- ver->left = NULL;
- ver->right = NULL;
- ver->next = NULL;
- is_found = 0;
- return ver;
- }
-
- while ( ver )
- {
- if ( kmerCompare ( & ( ver->kmer_t2 ), &vertex_kmer ) == 0 )
- {
- is_found = 1;
- return ver;
- }
-
- if ( ver->next == NULL ) { break; }
-
- ver = ver->next;
- }
-
- is_found = 0;
- ver->next = ( vertex2 * ) malloc ( sizeof ( vertex2 ) );
- ver->next->kmer_t2 = vertex_kmer;
- ver->next->left = NULL;
- ver->next->right = NULL;
- ver->next->next = NULL;
- return ver->next;
+ uint64_t hv = MurmurHash64A ( vertex_kmer.kmer, sizeof ( kmer_t2 ), 0 ); //hash value
+ uint64_t idx = ( size_t ) ( hv % v_ht->ht_sz );
+ vertex2 *ver = ( v_ht->store_pos ) [idx];
+
+ if ( !ver )
+ {
+ ( v_ht->store_pos ) [idx] = ( vertex2 * ) malloc ( sizeof ( vertex2 ) );
+ ver = ( v_ht->store_pos ) [idx];
+ ver->kmer_t2 = vertex_kmer;
+ ver->left = NULL;
+ ver->right = NULL;
+ ver->next = NULL;
+ is_found = 0;
+ return ver;
+ }
+
+ while ( ver )
+ {
+ if ( kmerCompare ( & ( ver->kmer_t2 ), &vertex_kmer ) == 0 )
+ {
+ is_found = 1;
+ return ver;
+ }
+
+ if ( ver->next == NULL )
+ {
+ break;
+ }
+
+ ver = ver->next;
+ }
+
+ is_found = 0;
+ ver->next = ( vertex2 * ) malloc ( sizeof ( vertex2 ) );
+ ver->next->kmer_t2 = vertex_kmer;
+ ver->next->left = NULL;
+ ver->next->right = NULL;
+ ver->next->next = NULL;
+ return ver->next;
}
-static void put_edge ( vertex2 * ver, kmer_t2 edge_kmer, bool is_left, int len, size_t edge_id ) //fixed
+static void put_edge ( vertex2 *ver, kmer_t2 edge_kmer, bool is_left, int len, size_t edge_id ) //fixed
{
- edge_starter2 * tmp = NULL;
-
- if ( is_left )
- {
- if ( !ver->left )
- {
- ver->left = ( edge_starter2 * ) malloc ( sizeof ( edge_starter2 ) );
- ver->left->edge_kmer = edge_kmer;
- ver->left->edge_id = edge_id;
- ver->left->len = len;//record the length of edge (1~k)
- ver->left->next = NULL;
- return;
- }
-
- tmp = ver->left;
- }
- else
- {
- if ( !ver->right )
- {
- ver->right = ( edge_starter2 * ) malloc ( sizeof ( edge_starter2 ) );
- ver->right->edge_kmer = edge_kmer;
- ver->right->edge_id = edge_id;
- ver->right->len = len;//record the length of edge (1~k)
- ver->right->next = NULL;
- return;
- }
-
- tmp = ver->right;
- }
-
- while ( tmp->next ) //because there are no two edges equal attached with one node ...
- {
- tmp = tmp->next;
- }
-
- tmp->next = ( edge_starter2 * ) malloc ( sizeof ( edge_starter2 ) );
- tmp->next->edge_kmer = edge_kmer;
- tmp->next->edge_id = edge_id;
- tmp->next->len = len;//record the length of edge (1~k)
- tmp->next->next = NULL;
+ edge_starter2 *tmp = NULL;
+
+ if ( is_left )
+ {
+ if ( !ver->left )
+ {
+ ver->left = ( edge_starter2 * ) malloc ( sizeof ( edge_starter2 ) );
+ ver->left->edge_kmer = edge_kmer;
+ ver->left->edge_id = edge_id;
+ ver->left->len = len;//record the length of edge (1~k)
+ ver->left->next = NULL;
+ return;
+ }
+
+ tmp = ver->left;
+ }
+ else
+ {
+ if ( !ver->right )
+ {
+ ver->right = ( edge_starter2 * ) malloc ( sizeof ( edge_starter2 ) );
+ ver->right->edge_kmer = edge_kmer;
+ ver->right->edge_id = edge_id;
+ ver->right->len = len;//record the length of edge (1~k)
+ ver->right->next = NULL;
+ return;
+ }
+
+ tmp = ver->right;
+ }
+
+ while ( tmp->next ) //because there are no two edges equal attached with one node ...
+ {
+ tmp = tmp->next;
+ }
+
+ tmp->next = ( edge_starter2 * ) malloc ( sizeof ( edge_starter2 ) );
+ tmp->next->edge_kmer = edge_kmer;
+ tmp->next->edge_id = edge_id;
+ tmp->next->len = len;//record the length of edge (1~k)
+ tmp->next->next = NULL;
}
-static vertex2 * search_vertex ( vertex_hash2 * v_ht, kmer_t2 * vertex_kmer ) //fixed ...
+static vertex2 *search_vertex ( vertex_hash2 *v_ht, kmer_t2 *vertex_kmer ) //fixed ...
{
- uint64_t hv = MurmurHash64A ( vertex_kmer->kmer, sizeof ( kmer_t2 ), 0 ); //hash value
- uint64_t idx = ( size_t ) ( hv % v_ht->ht_sz );
- vertex2 * ver = ( v_ht->store_pos ) [idx];
+ uint64_t hv = MurmurHash64A ( vertex_kmer->kmer, sizeof ( kmer_t2 ), 0 ); //hash value
+ uint64_t idx = ( size_t ) ( hv % v_ht->ht_sz );
+ vertex2 *ver = ( v_ht->store_pos ) [idx];
- while ( ver )
- {
- if ( kmerCompare ( & ( ver->kmer_t2 ), vertex_kmer ) == 0 )
- {
- return ver;
- }
+ while ( ver )
+ {
+ if ( kmerCompare ( & ( ver->kmer_t2 ), vertex_kmer ) == 0 )
+ {
+ return ver;
+ }
- ver = ver->next;
- }
+ ver = ver->next;
+ }
- return NULL;
+ return NULL;
}
-void init_preArc_array ( preArc_array * arc_array, size_t sz ) //63 127 same
+void init_preArc_array ( preArc_array *arc_array, size_t sz ) //63 127 same
{
- arc_array->array_sz = sz;
- arc_array->store_pos = ( preArc ** ) calloc ( sz, sizeof ( preArc * ) );
+ arc_array->array_sz = sz;
+ arc_array->store_pos = ( preArc ** ) calloc ( sz, sizeof ( preArc * ) );
}
-static void chop_kmers ( const char * read, int len, int K_size, kmer_t2 * kmer_array, int kmer_array_len, int & kmer_num )
+static void chop_kmers ( const char *read, int len, int K_size, kmer_t2 *kmer_array, int kmer_array_len, int &kmer_num )
{
- if ( len <= K_size )
- {
- kmer_num = 0;
- return ;
- }
-
- kmer_num = len - K_size + 1;
-
- if ( kmer_num > kmer_array_len )
- {
- fprintf ( stderr, "ERROR: the kmer_array_len is not enough! %d\n", kmer_num );
- exit ( 1 );
- }
-
- kmer_t2 kmer;
-
- for ( int i = 0; i < kmer_num; ++i ) //optimize later
- {
- get_kmer_from_seq ( read, len, K_size, i, &kmer );
- kmer_array[i] = kmer;
- }
+ if ( len <= K_size )
+ {
+ kmer_num = 0;
+ return ;
+ }
+
+ kmer_num = len - K_size + 1;
+
+ if ( kmer_num > kmer_array_len )
+ {
+ fprintf ( stderr, "ERROR: the kmer_array_len is not enough! %d\n", kmer_num );
+ exit ( 1 );
+ }
+
+ kmer_t2 kmer;
+
+ for ( int i = 0; i < kmer_num; ++i ) //optimize later
+ {
+ get_kmer_from_seq ( read, len, K_size, i, &kmer );
+ kmer_array[i] = kmer;
+ }
}
/*************************************************
@@ -541,166 +550,172 @@ Output:
Return:
None.
*************************************************/
-static inline void put_preArc_threaded ( preArc_array * arc_arr, pthread_spinlock_t * locks, size_t left_id, size_t right_id, int added_multi )
+static inline void put_preArc_threaded ( preArc_array *arc_arr, pthread_spinlock_t *locks, size_t left_id, size_t right_id, int added_multi )
{
- pthread_spin_lock ( &locks[left_id] );
- put_preArc ( arc_arr, left_id, right_id, added_multi );
- pthread_spin_unlock ( &locks[left_id] );
+ pthread_spin_lock ( &locks[left_id] );
+ put_preArc ( arc_arr, left_id, right_id, added_multi );
+ pthread_spin_unlock ( &locks[left_id] );
}
-static inline void put_preArc ( preArc_array * arc_arr, size_t left_id, size_t right_id, int added_multi )
+static inline void put_preArc ( preArc_array *arc_arr, size_t left_id, size_t right_id, int added_multi )
{
- preArc * arc = ( arc_arr->store_pos ) [left_id];
-
- if ( !arc )
- {
- ( arc_arr->store_pos ) [left_id] = ( preArc * ) malloc ( sizeof ( preArc ) );
- arc = ( arc_arr->store_pos ) [left_id];
- arc->to_ed = right_id;
- arc->multiplicity = added_multi;
- arc->next = NULL;
- return;
- }
-
- while ( arc )
- {
- if ( arc->to_ed == right_id )
- {
- arc->multiplicity += added_multi;
- return;
- }
-
- if ( arc->next == NULL ) { break; }
-
- arc = arc->next;
- }
-
- arc->next = ( preArc * ) malloc ( sizeof ( preArc ) );
- arc->next->to_ed = right_id;
- arc->next->multiplicity = added_multi;
- arc->next->next = NULL;
+ preArc *arc = ( arc_arr->store_pos ) [left_id];
+
+ if ( !arc )
+ {
+ ( arc_arr->store_pos ) [left_id] = ( preArc * ) malloc ( sizeof ( preArc ) );
+ arc = ( arc_arr->store_pos ) [left_id];
+ arc->to_ed = right_id;
+ arc->multiplicity = added_multi;
+ arc->next = NULL;
+ return;
+ }
+
+ while ( arc )
+ {
+ if ( arc->to_ed == right_id )
+ {
+ arc->multiplicity += added_multi;
+ return;
+ }
+
+ if ( arc->next == NULL )
+ {
+ break;
+ }
+
+ arc = arc->next;
+ }
+
+ arc->next = ( preArc * ) malloc ( sizeof ( preArc ) );
+ arc->next->to_ed = right_id;
+ arc->next->multiplicity = added_multi;
+ arc->next->next = NULL;
}
-static inline void put_preArc ( preArc_array * arc_arr, size_t left_id, size_t right_id )
+static inline void put_preArc ( preArc_array *arc_arr, size_t left_id, size_t right_id )
{
- preArc * arc = ( arc_arr->store_pos ) [left_id];
-
- if ( !arc )
- {
- ( arc_arr->store_pos ) [left_id] = ( preArc * ) malloc ( sizeof ( preArc ) );
- arc = ( arc_arr->store_pos ) [left_id];
- arc->to_ed = right_id;
- arc->multiplicity = 1;
- arc->next = NULL;
- return;
- }
-
- while ( arc )
- {
- if ( arc->to_ed == right_id )
- {
- arc->multiplicity++;
- return;
- }
-
- if ( arc->next == NULL ) { break; }
-
- arc = arc->next;
- }
-
- arc->next = ( preArc * ) malloc ( sizeof ( preArc ) );
- arc->next->to_ed = right_id;
- arc->next->multiplicity = 1;
- arc->next->next = NULL;
+ preArc *arc = ( arc_arr->store_pos ) [left_id];
+
+ if ( !arc )
+ {
+ ( arc_arr->store_pos ) [left_id] = ( preArc * ) malloc ( sizeof ( preArc ) );
+ arc = ( arc_arr->store_pos ) [left_id];
+ arc->to_ed = right_id;
+ arc->multiplicity = 1;
+ arc->next = NULL;
+ return;
+ }
+
+ while ( arc )
+ {
+ if ( arc->to_ed == right_id )
+ {
+ arc->multiplicity++;
+ return;
+ }
+
+ if ( arc->next == NULL )
+ {
+ break;
+ }
+
+ arc = arc->next;
+ }
+
+ arc->next = ( preArc * ) malloc ( sizeof ( preArc ) );
+ arc->next->to_ed = right_id;
+ arc->next->multiplicity = 1;
+ arc->next->next = NULL;
}
-void output_preArcs ( preArc_array * arc_arr, char * outfile )
+void output_preArcs ( preArc_array *arc_arr, char *outfile )
{
- FILE * fp;
- fp = fopen ( outfile, "w" );
-
- if ( !fp )
- {
- fprintf ( stderr, "ERROR: can't create file %s in output_preArc\n", outfile );
- exit ( 1 );
- }
-
- preArc * parc;
-
- for ( size_t i = 0; i < arc_arr->array_sz; ++i )
- {
- parc = ( arc_arr->store_pos ) [i];
-
- if ( parc )
- {
- fprintf ( fp, "%u", i );
- int j = 0;
-
- while ( parc )
- {
- j++;
- fprintf ( fp, " %u %u", parc->to_ed, parc->multiplicity );
- parc = parc->next;
-
- if ( parc && j % 4 == 0 )
- {
- fprintf ( fp, "\n" );
- fprintf ( fp, "%u", i );
- }
- }
-
- fprintf ( fp, "\n" );
- }
- }
-
- fclose ( fp );
+ FILE *fp;
+ fp = fopen ( outfile, "w" );
+
+ if ( !fp )
+ {
+ fprintf ( stderr, "ERROR: can't create file %s in output_preArc\n", outfile );
+ exit ( 1 );
+ }
+
+ preArc *parc;
+
+ for ( size_t i = 0; i < arc_arr->array_sz; ++i )
+ {
+ parc = ( arc_arr->store_pos ) [i];
+
+ if ( parc )
+ {
+ fprintf ( fp, "%u", i );
+ int j = 0;
+
+ while ( parc )
+ {
+ j++;
+ fprintf ( fp, " %u %u", parc->to_ed, parc->multiplicity );
+ parc = parc->next;
+
+ if ( parc && j % 4 == 0 )
+ {
+ fprintf ( fp, "\n" );
+ fprintf ( fp, "%u", i );
+ }
+ }
+
+ fprintf ( fp, "\n" );
+ }
+ }
+
+ fclose ( fp );
}
-static void free_vertex ( vertex2 * tmp )
+static void free_vertex ( vertex2 *tmp )
{
- edge_starter2 * edge_s, *edge_s2;
- edge_s = tmp->left;
-
- while ( edge_s )
- {
- edge_s2 = edge_s;
- edge_s = edge_s->next;
- free ( edge_s2 );
- }
-
- edge_s = tmp->right;
-
- while ( edge_s )
- {
- edge_s2 = edge_s;
- edge_s = edge_s->next;
- free ( edge_s2 );
- }
-
- free ( tmp );
+ edge_starter2 *edge_s, *edge_s2;
+ edge_s = tmp->left;
+
+ while ( edge_s )
+ {
+ edge_s2 = edge_s;
+ edge_s = edge_s->next;
+ free ( edge_s2 );
+ }
+
+ edge_s = tmp->right;
+
+ while ( edge_s )
+ {
+ edge_s2 = edge_s;
+ edge_s = edge_s->next;
+ free ( edge_s2 );
+ }
+
+ free ( tmp );
}
-void free_vertex_hash ( vertex_hash2 * v_ht )
+void free_vertex_hash ( vertex_hash2 *v_ht )
{
- vertex2 * tmp, *tmp2;
+ vertex2 *tmp, *tmp2;
- for ( size_t i = 0; i < v_ht->ht_sz; ++i )
- {
- tmp = ( v_ht->store_pos ) [i];
+ for ( size_t i = 0; i < v_ht->ht_sz; ++i )
+ {
+ tmp = ( v_ht->store_pos ) [i];
- while ( tmp )
- {
- tmp2 = tmp;
- tmp = tmp->next;
- free_vertex ( tmp2 );
- }
- }
+ while ( tmp )
+ {
+ tmp2 = tmp;
+ tmp = tmp->next;
+ free_vertex ( tmp2 );
+ }
+ }
- free ( v_ht->store_pos );
+ free ( v_ht->store_pos );
}
/*************************************************
@@ -727,270 +742,283 @@ Output:
Return:
None.
*************************************************/
-void process_1read_preArc ( preArc_array * arc_arr, pthread_spinlock_t * locks, int thread_id, vertex_hash2 * v_ht, int K_size, int cut_off_len, const char * read )
+void process_1read_preArc ( preArc_array *arc_arr, pthread_spinlock_t *locks, int thread_id, vertex_hash2 *v_ht, int K_size, int cut_off_len, const char *read )
{
- const int BUFF_LEN = 1024;
- kmer_t2 kmers[BUFF_LEN];
- int kmer_array_len = cut_off_len - K_size + 1;
- int kmer_num ;
- vertex2 * v_tmp;
- edge_starter2 * e_tmp;
- size_t left_id;
- size_t right_id;
- int left_found = 0, right_found = 0;
- int edge_len;
- //update
- //int map_len;
- //int shortest_maplen = 0;
- //add for -R solving tiny repeats
- unsigned int path[128];
- unsigned int counter = 0;
- //int read_len,i=0;
- int read_len = strlen ( read );
- /*
- while(read[i]!='\0'){
- i++;
- }
- read_len = i;
- //read_len = strlen(read);
- if(read[read_len-1]=='\n'){
- read[read_len-1]='\0';
- read_len--;
- }*/
-
- if ( read_len > cut_off_len ) { read_len = cut_off_len; }
-
- kmer_array_len = read_len - K_size + 1;
- chop_kmers ( read, read_len, K_size, kmers, kmer_array_len, kmer_num );
-
- for ( int i = 1; i < kmer_num - 1; ++i ) //search every kmer exclude the begin and end kmer
- {
- v_tmp = search_vertex ( v_ht, &kmers[i] );
-
- if ( v_tmp ) //found
- {
- //search left edge kmer got left id
- e_tmp = v_tmp->left;
-
- while ( e_tmp )
- {
- edge_len = e_tmp->len;
-
- if ( edge_len <= i )
- {
- if ( kmerCompare ( & ( kmers[i - edge_len] ), & ( e_tmp->edge_kmer ) ) == 0 )
- {
- left_id = e_tmp->edge_id;
-
- if ( left_found )
- {
- fprintf ( stderr, "ERROR: left edge id found already !new found id %llu \n", left_id );
- fprintf ( stderr, "i:%d ,edge_len:%d\n", i, edge_len );
- printKmerSeq ( & ( kmers[i - edge_len] ), K_size, stderr );
- printKmerSeq ( & ( e_tmp->edge_kmer ), K_size, stderr );
- exit ( 1 );
- };
-
- left_found = 1;
-
- break;
- }
- }
- else
- {
- kmer_t2 read_edge = kmers[0];
-
- if ( K_size > i )
- {
- kmerMoveRight ( &read_edge, K_size - i );
- }
-
- kmer_t2 KMER_FILTER;
- initKmerFilter ( i, &KMER_FILTER );
- kmer_t2 edge_kmer = e_tmp->edge_kmer;
-
- if ( K_size > edge_len )
- {
- kmerMoveRight ( &edge_kmer, K_size - edge_len );
- }
-
- kmerAnd ( &read_edge, &KMER_FILTER );
- kmerAnd ( &edge_kmer, &KMER_FILTER );
-
- if ( kmerCompare ( &read_edge, &edge_kmer ) == 0 )
- {
- left_found++;
- left_id = e_tmp->edge_id;
-
- if ( left_found == 2 )
- {
- //debug_build<<"can't distinct which left edge\n";
- break;
- }
- }
- }
-
- e_tmp = e_tmp->next;
- }
-
- //update maplen_control
- /*
- if(edge_len >= shortest_maplen){
- if(map_len < shortest_maplen) left_found = 0;
- }else{
- if(map_len != edge_len) left_found = 0;
- }*/
-
- if ( left_found != 1 ) {left_found = 0; right_found = 0; continue;} //not found or multi found
-
- //todo : aln if left_found = 0 ... find the best
- //search right edge kmer got right id
- e_tmp = v_tmp->right;
-
- while ( e_tmp )
- {
- edge_len = e_tmp->len;
-
- if ( edge_len <= kmer_num - 1 - i )
- {
- if ( kmerCompare ( & ( kmers[i + edge_len] ), & ( e_tmp->edge_kmer ) ) == 0 )
- {
- right_id = e_tmp->edge_id;
-
- if ( right_found )
- {
- fprintf ( stderr, "ERROR: right edge id found already, new found id %llu !\n", right_id );
- fprintf ( stderr, "i:%d ,edge_len:%d\n", i, edge_len );
- printKmerSeq ( & ( kmers[i + edge_len] ), K_size, stderr );
- printKmerSeq ( & ( e_tmp->edge_kmer ), K_size, stderr );
- exit ( 1 );
- };
-
- right_found = 1;
-
- break;
- }
- }
- else
- {
- int read_edge_len = ( kmer_num - 1 - i );
- kmer_t2 KMER_FILTER;
- initKmerFilter ( read_edge_len, &KMER_FILTER );
- kmer_t2 read_edge = kmers[kmer_num - 1];
- kmerAnd ( &read_edge, &KMER_FILTER );
- kmer_t2 edge_kmer = e_tmp->edge_kmer;
-
- if ( edge_len > read_edge_len )
- {
- kmerMoveRight ( &edge_kmer, ( edge_len - read_edge_len ) );
- }
-
- kmerAnd ( &edge_kmer, &KMER_FILTER );
-
- if ( kmerCompare ( &read_edge, &edge_kmer ) == 0 )
- {
- right_found++;
- right_id = e_tmp->edge_id;
-
- if ( right_found == 2 )
- {
- //debug_build<<"can't distinct which right edge\n";
- break;
- }
- }
- }
-
- e_tmp = e_tmp->next;
- }
-
- //update map_len control
- /*
- if(edge_len >= shortest_maplen){
- if(map_len < shortest_maplen) right_found = 0;
- }else{
- if(map_len != edge_len) right_found = 0;
- }*/
-
- if ( right_found != 1 ) {left_found = 0; right_found = 0; continue;}
-
- //todo : aln if right_found = 0 ... find the best
- //if(left_found == 1 && right_found ==1)
- //store this preArc
- //preArc_array *arc_arr
- put_preArc_threaded ( arc_arr, locks, left_id, right_id, 1 );
-
- //constructing the path ...
- if ( solve )
- {
- if ( counter == 0 )
- {
- counter = 2;
- path[1] = left_id;
- path[2] = right_id;
- }
- else if ( counter <= 100 )
- {
- if ( path[counter] == left_id )
- {
- path[++counter] = right_id;
- }
- else
- {
- path[++counter] = left_id;
- path[++counter] = right_id;
- }
- }
- }
-
- //end ...
- left_found = 0;
- right_found = 0;
- }
- }
-
- //add to path buffer , if full filled ,output it
- if ( solve )
- {
- if ( counter >= 3 && counter <= 100 )
- {
- path[0] = counter;
- int tmp = is_full ( path_buffer[thread_id] );
-
- if ( tmp == 1 )
- {
- //output it
- output_edge_path_buffer_locked ( path_buffer[thread_id], path_fp, &file_lock );
- }
- else if ( tmp == -1 )
- {
- //error status
- fprintf ( stderr, "ERROR: path buffer overflow!! system exit .\n" );
- exit ( -1 );
- }
-
- put_path_2_buffer ( path_buffer[thread_id], path );
- }
- }
+ const int BUFF_LEN = 1024;
+ kmer_t2 kmers[BUFF_LEN];
+ int kmer_array_len = cut_off_len - K_size + 1;
+ int kmer_num ;
+ vertex2 *v_tmp;
+ edge_starter2 *e_tmp;
+ size_t left_id;
+ size_t right_id;
+ int left_found = 0, right_found = 0;
+ int edge_len;
+ //update
+ //int map_len;
+ //int shortest_maplen = 0;
+ //add for -R solving tiny repeats
+ unsigned int path[128];
+ unsigned int counter = 0;
+ //int read_len,i=0;
+ int read_len = strlen ( read );
+ /*
+ while(read[i]!='\0'){
+ i++;
+ }
+ read_len = i;
+ //read_len = strlen(read);
+ if(read[read_len-1]=='\n'){
+ read[read_len-1]='\0';
+ read_len--;
+ }*/
+
+ if ( read_len > cut_off_len )
+ {
+ read_len = cut_off_len;
+ }
+
+ kmer_array_len = read_len - K_size + 1;
+ chop_kmers ( read, read_len, K_size, kmers, kmer_array_len, kmer_num );
+
+ for ( int i = 1; i < kmer_num - 1; ++i ) //search every kmer exclude the begin and end kmer
+ {
+ v_tmp = search_vertex ( v_ht, &kmers[i] );
+
+ if ( v_tmp ) //found
+ {
+ //search left edge kmer got left id
+ e_tmp = v_tmp->left;
+
+ while ( e_tmp )
+ {
+ edge_len = e_tmp->len;
+
+ if ( edge_len <= i )
+ {
+ if ( kmerCompare ( & ( kmers[i - edge_len] ), & ( e_tmp->edge_kmer ) ) == 0 )
+ {
+ left_id = e_tmp->edge_id;
+
+ if ( left_found )
+ {
+ fprintf ( stderr, "ERROR: left edge id found already !new found id %llu \n", left_id );
+ fprintf ( stderr, "i:%d ,edge_len:%d\n", i, edge_len );
+ printKmerSeq ( & ( kmers[i - edge_len] ), K_size, stderr );
+ printKmerSeq ( & ( e_tmp->edge_kmer ), K_size, stderr );
+ exit ( 1 );
+ };
+
+ left_found = 1;
+
+ break;
+ }
+ }
+ else
+ {
+ kmer_t2 read_edge = kmers[0];
+
+ if ( K_size > i )
+ {
+ kmerMoveRight ( &read_edge, K_size - i );
+ }
+
+ kmer_t2 KMER_FILTER;
+ initKmerFilter ( i, &KMER_FILTER );
+ kmer_t2 edge_kmer = e_tmp->edge_kmer;
+
+ if ( K_size > edge_len )
+ {
+ kmerMoveRight ( &edge_kmer, K_size - edge_len );
+ }
+
+ kmerAnd ( &read_edge, &KMER_FILTER );
+ kmerAnd ( &edge_kmer, &KMER_FILTER );
+
+ if ( kmerCompare ( &read_edge, &edge_kmer ) == 0 )
+ {
+ left_found++;
+ left_id = e_tmp->edge_id;
+
+ if ( left_found == 2 )
+ {
+ //debug_build<<"can't distinct which left edge\n";
+ break;
+ }
+ }
+ }
+
+ e_tmp = e_tmp->next;
+ }
+
+ //update maplen_control
+ /*
+ if(edge_len >= shortest_maplen){
+ if(map_len < shortest_maplen) left_found = 0;
+ }else{
+ if(map_len != edge_len) left_found = 0;
+ }*/
+
+ if ( left_found != 1 )
+ {
+ left_found = 0; //not found or multi found
+ right_found = 0;
+ continue;
+ }
+
+ //todo : aln if left_found = 0 ... find the best
+ //search right edge kmer got right id
+ e_tmp = v_tmp->right;
+
+ while ( e_tmp )
+ {
+ edge_len = e_tmp->len;
+
+ if ( edge_len <= kmer_num - 1 - i )
+ {
+ if ( kmerCompare ( & ( kmers[i + edge_len] ), & ( e_tmp->edge_kmer ) ) == 0 )
+ {
+ right_id = e_tmp->edge_id;
+
+ if ( right_found )
+ {
+ fprintf ( stderr, "ERROR: right edge id found already, new found id %llu !\n", right_id );
+ fprintf ( stderr, "i:%d ,edge_len:%d\n", i, edge_len );
+ printKmerSeq ( & ( kmers[i + edge_len] ), K_size, stderr );
+ printKmerSeq ( & ( e_tmp->edge_kmer ), K_size, stderr );
+ exit ( 1 );
+ };
+
+ right_found = 1;
+
+ break;
+ }
+ }
+ else
+ {
+ int read_edge_len = ( kmer_num - 1 - i );
+ kmer_t2 KMER_FILTER;
+ initKmerFilter ( read_edge_len, &KMER_FILTER );
+ kmer_t2 read_edge = kmers[kmer_num - 1];
+ kmerAnd ( &read_edge, &KMER_FILTER );
+ kmer_t2 edge_kmer = e_tmp->edge_kmer;
+
+ if ( edge_len > read_edge_len )
+ {
+ kmerMoveRight ( &edge_kmer, ( edge_len - read_edge_len ) );
+ }
+
+ kmerAnd ( &edge_kmer, &KMER_FILTER );
+
+ if ( kmerCompare ( &read_edge, &edge_kmer ) == 0 )
+ {
+ right_found++;
+ right_id = e_tmp->edge_id;
+
+ if ( right_found == 2 )
+ {
+ //debug_build<<"can't distinct which right edge\n";
+ break;
+ }
+ }
+ }
+
+ e_tmp = e_tmp->next;
+ }
+
+ //update map_len control
+ /*
+ if(edge_len >= shortest_maplen){
+ if(map_len < shortest_maplen) right_found = 0;
+ }else{
+ if(map_len != edge_len) right_found = 0;
+ }*/
+
+ if ( right_found != 1 )
+ {
+ left_found = 0;
+ right_found = 0;
+ continue;
+ }
+
+ //todo : aln if right_found = 0 ... find the best
+ //if(left_found == 1 && right_found ==1)
+ //store this preArc
+ //preArc_array *arc_arr
+ put_preArc_threaded ( arc_arr, locks, left_id, right_id, 1 );
+
+ //constructing the path ...
+ if ( solve )
+ {
+ if ( counter == 0 )
+ {
+ counter = 2;
+ path[1] = left_id;
+ path[2] = right_id;
+ }
+ else if ( counter <= 100 )
+ {
+ if ( path[counter] == left_id )
+ {
+ path[++counter] = right_id;
+ }
+ else
+ {
+ path[++counter] = left_id;
+ path[++counter] = right_id;
+ }
+ }
+ }
+
+ //end ...
+ left_found = 0;
+ right_found = 0;
+ }
+ }
+
+ //add to path buffer , if full filled ,output it
+ if ( solve )
+ {
+ if ( counter >= 3 && counter <= 100 )
+ {
+ path[0] = counter;
+ int tmp = is_full ( path_buffer[thread_id] );
+
+ if ( tmp == 1 )
+ {
+ //output it
+ output_edge_path_buffer_locked ( path_buffer[thread_id], path_fp, &file_lock );
+ }
+ else if ( tmp == -1 )
+ {
+ //error status
+ fprintf ( stderr, "ERROR: path buffer overflow!! system exit .\n" );
+ exit ( -1 );
+ }
+
+ put_path_2_buffer ( path_buffer[thread_id], path );
+ }
+ }
}
-void free_preArc_array ( preArc_array * arc_array )
+void free_preArc_array ( preArc_array *arc_array )
{
- preArc * tmp, *tmp2;
+ preArc *tmp, *tmp2;
- for ( size_t i = 0; i < arc_array->array_sz; ++i )
- {
- tmp = ( arc_array->store_pos ) [i];
+ for ( size_t i = 0; i < arc_array->array_sz; ++i )
+ {
+ tmp = ( arc_array->store_pos ) [i];
- while ( tmp )
- {
- tmp2 = tmp;
- tmp = tmp->next;
- free ( tmp2 );
- }
- }
+ while ( tmp )
+ {
+ tmp2 = tmp;
+ tmp = tmp->next;
+ free ( tmp2 );
+ }
+ }
- free ( arc_array->store_pos );
+ free ( arc_array->store_pos );
}
@@ -1012,84 +1040,87 @@ Output:
Return:
None.
*************************************************/
-void build_preArc_threaded ( preArc_array * arc_arr, vertex_hash2 * v_ht, int K_size, int cut_off_len, vector<string> *in_filenames_vt, int thread_num )
+void build_preArc_threaded ( preArc_array *arc_arr, vertex_hash2 *v_ht, int K_size, int cut_off_len, vector<string> *in_filenames_vt, int thread_num )
{
- //create main io thread
- int read_buf_sz = 102400 * thrd_num_s;
- read_buf0 = new string[read_buf_sz];
- read_buf1 = new string[read_buf_sz];
- io_stat0 = 1; //must be one, if io_stat0 =0 ,the io thread will work immediately
- io_stat1 = 1;
- io_ready = 0;
- io_para_main io_para_mains;
- io_para_mains.read_buf_sz = read_buf_sz;
- io_para_mains.in_filenames_vt = in_filenames_vt;
- pthread_t io_thread;
- int temp;
-
- //fprintf(stderr,"Creating main io thread ...\n");
- if ( ( temp = pthread_create ( &io_thread, NULL, run_io_thread_main, &io_para_mains ) ) != 0 )
- {
- fprintf ( stderr, "ERROR: failed creating main io thread.\n" );
- exit ( -1 );
- }
-
- fprintf ( stderr, "1 io thread initialized.\n" );
- //create work threads ..
- //fprintf(stderr,"Creating work threads ...\n");
- pthread_t threads[thrd_num_s];
- unsigned char thrdSignal[thrd_num_s + 1];
- PARAMETER paras[thrd_num_s];
- locks = ( pthread_spinlock_t * ) calloc ( arc_arr->array_sz, sizeof ( pthread_spinlock_t ) );
-
- //init as unlock stat ..
- for ( size_t i = 0; i < arc_arr->array_sz; ++i )
- {
- locks[i] = 1;
- }
-
- for ( int k = 0; k < thrd_num_s; k++ )
- {
- thrdSignal[k + 1] = 0;
- paras[k].threadID = k;
- paras[k].mainSignal = &thrdSignal[0];
- paras[k].selfSignal = &thrdSignal[k + 1];
- paras[k].ht = NULL;
- paras[k].preArcs = arc_arr;
- paras[k].v_ht = v_ht;
- paras[k].cut_off_len = cut_off_len;
- paras[k].K_size = K_size;
- paras[k].gap = gap;
- }
-
- creatThrds ( threads, paras );
- thrdSignal[0] = 0;
-
- //run it
- while ( 1 )
- {
- sendIOWorkSignal();
-
- while ( io_ready == 0 ) {usleep ( 1 );}
-
- if ( io_ready )
- {
- sendWorkSignal ( 12, thrdSignal );
- }
-
- if ( io_ready == 2 )
- {
- //fprintf(stderr,"All reads have been processed!\n");
- break;
- }
- }
-
- sendWorkSignal ( 3, thrdSignal );
- thread_wait ( threads );
- delete [] read_buf0;
- delete [] read_buf1;
- free ( ( void * ) locks );
- free_vertex_hash ( v_ht );
+ //create main io thread
+ int read_buf_sz = 102400 * thrd_num_s;
+ read_buf0 = new string[read_buf_sz];
+ read_buf1 = new string[read_buf_sz];
+ io_stat0 = 1; //must be one, if io_stat0 =0 ,the io thread will work immediately
+ io_stat1 = 1;
+ io_ready = 0;
+ io_para_main io_para_mains;
+ io_para_mains.read_buf_sz = read_buf_sz;
+ io_para_mains.in_filenames_vt = in_filenames_vt;
+ pthread_t io_thread;
+ int temp;
+
+ //fprintf(stderr,"Creating main io thread ...\n");
+ if ( ( temp = pthread_create ( &io_thread, NULL, run_io_thread_main, &io_para_mains ) ) != 0 )
+ {
+ fprintf ( stderr, "ERROR: failed creating main io thread.\n" );
+ exit ( -1 );
+ }
+
+ fprintf ( stderr, "1 io thread initialized.\n" );
+ //create work threads ..
+ //fprintf(stderr,"Creating work threads ...\n");
+ pthread_t threads[thrd_num_s];
+ unsigned char thrdSignal[thrd_num_s + 1];
+ PARAMETER paras[thrd_num_s];
+ locks = ( pthread_spinlock_t * ) calloc ( arc_arr->array_sz, sizeof ( pthread_spinlock_t ) );
+
+ //init as unlock stat ..
+ for ( size_t i = 0; i < arc_arr->array_sz; ++i )
+ {
+ locks[i] = 1;
+ }
+
+ for ( int k = 0; k < thrd_num_s; k++ )
+ {
+ thrdSignal[k + 1] = 0;
+ paras[k].threadID = k;
+ paras[k].mainSignal = &thrdSignal[0];
+ paras[k].selfSignal = &thrdSignal[k + 1];
+ paras[k].ht = NULL;
+ paras[k].preArcs = arc_arr;
+ paras[k].v_ht = v_ht;
+ paras[k].cut_off_len = cut_off_len;
+ paras[k].K_size = K_size;
+ paras[k].gap = gap;
+ }
+
+ creatThrds ( threads, paras );
+ thrdSignal[0] = 0;
+
+ //run it
+ while ( 1 )
+ {
+ sendIOWorkSignal();
+
+ while ( io_ready == 0 )
+ {
+ usleep ( 1 );
+ }
+
+ if ( io_ready )
+ {
+ sendWorkSignal ( 12, thrdSignal );
+ }
+
+ if ( io_ready == 2 )
+ {
+ //fprintf(stderr,"All reads have been processed!\n");
+ break;
+ }
+ }
+
+ sendWorkSignal ( 3, thrdSignal );
+ thread_wait ( threads );
+ delete [] read_buf0;
+ delete [] read_buf1;
+ free ( ( void * ) locks );
+ free_vertex_hash ( v_ht );
}
@@ -1105,35 +1136,35 @@ Output:
Return:
an edge_path_buffer pointer to heap
*************************************************/
-edge_path_buffer * create_edge_path_buffer
-( unsigned int * mark_on_edge,
- pthread_spinlock_t * locks,
+edge_path_buffer *create_edge_path_buffer
+( unsigned int *mark_on_edge,
+ pthread_spinlock_t *locks,
unsigned long long buff_size,
unsigned int max_path_length )
{
- if ( ! ( mark_on_edge && locks ) )
- {
- fprintf ( stderr, "ERROR: The initial mark_on_edge array or locks are not valid! Exit System ...\n" );
- exit ( -1 );
- }
-
- edge_path_buffer * new_buffer = ( edge_path_buffer * ) calloc ( 1, sizeof ( edge_path_buffer ) );
- new_buffer->mark_on_edge = mark_on_edge;
- new_buffer->locks = locks;
- new_buffer->buff_size = buff_size;
- new_buffer->max_path_length = max_path_length;
- new_buffer->filled_num = 0;
- new_buffer->path_buffer = NULL;
- unsigned int ** tmp;
- tmp = ( unsigned int ** ) calloc ( buff_size, sizeof ( unsigned int * ) );
-
- for ( size_t i = 0; i < buff_size; i++ )
- {
- tmp[i] = ( unsigned int * ) calloc ( max_path_length, sizeof ( unsigned int ) );
- }
-
- new_buffer->path_buffer = tmp;
- return new_buffer;
+ if ( ! ( mark_on_edge && locks ) )
+ {
+ fprintf ( stderr, "ERROR: The initial mark_on_edge array or locks are not valid! Exit System ...\n" );
+ exit ( -1 );
+ }
+
+ edge_path_buffer *new_buffer = ( edge_path_buffer * ) calloc ( 1, sizeof ( edge_path_buffer ) );
+ new_buffer->mark_on_edge = mark_on_edge;
+ new_buffer->locks = locks;
+ new_buffer->buff_size = buff_size;
+ new_buffer->max_path_length = max_path_length;
+ new_buffer->filled_num = 0;
+ new_buffer->path_buffer = NULL;
+ unsigned int **tmp;
+ tmp = ( unsigned int ** ) calloc ( buff_size, sizeof ( unsigned int * ) );
+
+ for ( size_t i = 0; i < buff_size; i++ )
+ {
+ tmp[i] = ( unsigned int * ) calloc ( max_path_length, sizeof ( unsigned int ) );
+ }
+
+ new_buffer->path_buffer = tmp;
+ return new_buffer;
}
/*************************************************
@@ -1148,58 +1179,58 @@ Output:
Return:
None
*************************************************/
-void destory_edge_path_buffer ( struct edge_path_buffer * buffer )
+void destory_edge_path_buffer ( struct edge_path_buffer *buffer )
{
- unsigned int ** tmp = buffer->path_buffer;
+ unsigned int **tmp = buffer->path_buffer;
- for ( size_t i = 0; i < buffer->buff_size; i++ )
- {
- free ( ( void * ) ( tmp[i] ) );
- }
+ for ( size_t i = 0; i < buffer->buff_size; i++ )
+ {
+ free ( ( void * ) ( tmp[i] ) );
+ }
- free ( ( void * ) tmp );
- buffer->filled_num = 0;
+ free ( ( void * ) tmp );
+ buffer->filled_num = 0;
}
-void clear_edge_path_buffer ( struct edge_path_buffer * buffer )
+void clear_edge_path_buffer ( struct edge_path_buffer *buffer )
{
- unsigned int ** tmp = buffer->path_buffer;
+ unsigned int **tmp = buffer->path_buffer;
- for ( size_t i = 0; i < buffer->buff_size; i++ )
- {
- memset ( tmp[i], 0, buffer->max_path_length * sizeof ( unsigned int ) );
- }
+ for ( size_t i = 0; i < buffer->buff_size; i++ )
+ {
+ memset ( tmp[i], 0, buffer->max_path_length * sizeof ( unsigned int ) );
+ }
- buffer->filled_num = 0;
+ buffer->filled_num = 0;
}
-void output_edge_path_buffer ( struct edge_path_buffer * buffer, FILE * path_file )
+void output_edge_path_buffer ( struct edge_path_buffer *buffer, FILE *path_file )
{
- if ( debug )
- {
- static size_t times = 0, total = 0;
- total += buffer->filled_num;
- fprintf ( stderr, "call output_edge_path_buffer %lu %lu\n", times++, total );
- }
-
- if ( !path_file )
- {
- fprintf ( stderr, "ERROR: The path_file is not avilable!\n" );
- exit ( -1 );
- }
-
- unsigned int counter;
- unsigned int ** tmp = buffer->path_buffer;
-
- for ( size_t i = 0; i < buffer->filled_num; i++ )
- {
- counter = tmp[i][0];
- fwrite ( &counter, sizeof ( char ), 1, path_file );
- fwrite ( tmp[i] + 1, sizeof ( unsigned int ), ( int ) counter, path_file );
- }
-
- buffer->filled_num = 0;
+ if ( debug )
+ {
+ static size_t times = 0, total = 0;
+ total += buffer->filled_num;
+ fprintf ( stderr, "call output_edge_path_buffer %lu %lu\n", times++, total );
+ }
+
+ if ( !path_file )
+ {
+ fprintf ( stderr, "ERROR: The path_file is not avilable!\n" );
+ exit ( -1 );
+ }
+
+ unsigned int counter;
+ unsigned int **tmp = buffer->path_buffer;
+
+ for ( size_t i = 0; i < buffer->filled_num; i++ )
+ {
+ counter = tmp[i][0];
+ fwrite ( &counter, sizeof ( char ), 1, path_file );
+ fwrite ( tmp[i] + 1, sizeof ( unsigned int ), ( int ) counter, path_file );
+ }
+
+ buffer->filled_num = 0;
}
@@ -1215,35 +1246,35 @@ Output:
Return:
None.
*************************************************/
-void output_edge_path_buffer_locked ( struct edge_path_buffer * buffer, FILE * path_file, pthread_mutex_t * file_mutex )
+void output_edge_path_buffer_locked ( struct edge_path_buffer *buffer, FILE *path_file, pthread_mutex_t *file_mutex )
{
- static size_t times = 0, total = 0;;
-
- if ( !path_file )
- {
- fprintf ( stderr, "ERROR: The path_file is not avilable!\n" );
- exit ( -1 );
- }
-
- unsigned int counter;
- unsigned int ** tmp = buffer->path_buffer;
- pthread_mutex_lock ( file_mutex );
-
- if ( debug )
- {
- total += buffer->filled_num;
- fprintf ( stderr, "call output_edge_path_buffer_locked %lu %lu\n", times++, total );
- }
-
- for ( size_t i = 0; i < buffer->filled_num; i++ )
- {
- counter = tmp[i][0];
- fwrite ( &counter, sizeof ( char ), 1, path_file );
- fwrite ( tmp[i] + 1, sizeof ( unsigned int ), ( int ) counter, path_file );
- }
-
- pthread_mutex_unlock ( file_mutex );
- buffer->filled_num = 0;
+ static size_t times = 0, total = 0;;
+
+ if ( !path_file )
+ {
+ fprintf ( stderr, "ERROR: The path_file is not avilable!\n" );
+ exit ( -1 );
+ }
+
+ unsigned int counter;
+ unsigned int **tmp = buffer->path_buffer;
+ pthread_mutex_lock ( file_mutex );
+
+ if ( debug )
+ {
+ total += buffer->filled_num;
+ fprintf ( stderr, "call output_edge_path_buffer_locked %lu %lu\n", times++, total );
+ }
+
+ for ( size_t i = 0; i < buffer->filled_num; i++ )
+ {
+ counter = tmp[i][0];
+ fwrite ( &counter, sizeof ( char ), 1, path_file );
+ fwrite ( tmp[i] + 1, sizeof ( unsigned int ), ( int ) counter, path_file );
+ }
+
+ pthread_mutex_unlock ( file_mutex );
+ buffer->filled_num = 0;
}
@@ -1260,59 +1291,59 @@ Output:
Return:
None.
*************************************************/
-int put_path_2_buffer ( struct edge_path_buffer * buffer, unsigned int * path )
+int put_path_2_buffer ( struct edge_path_buffer *buffer, unsigned int *path )
{
- if ( debug )
- {
- static size_t times = 0;
- static pthread_spinlock_t lock = 1;
- pthread_spin_lock ( &lock );
- fprintf ( stderr, "call put_path_2_buffer %lu\n", times++ );
- pthread_spin_unlock ( &lock );
- }
-
- unsigned long long pos = buffer->filled_num;
-
- if ( pos >= buffer->buff_size )
- {
- return -1;
- }
-
- memcpy ( ( buffer->path_buffer ) [pos], path, buffer->max_path_length * sizeof ( unsigned int ) );
-
- for ( unsigned int i = 1; i < path[0]; i++ )
- {
- pthread_spin_lock ( ( buffer->locks ) + path[i] );
- ( ( buffer->mark_on_edge ) [path[i]] ) ++;
- pthread_spin_unlock ( ( buffer->locks ) + path[i] );
- }
-
- buffer->filled_num++;
- return 1;
+ if ( debug )
+ {
+ static size_t times = 0;
+ static pthread_spinlock_t lock = 1;
+ pthread_spin_lock ( &lock );
+ fprintf ( stderr, "call put_path_2_buffer %lu\n", times++ );
+ pthread_spin_unlock ( &lock );
+ }
+
+ unsigned long long pos = buffer->filled_num;
+
+ if ( pos >= buffer->buff_size )
+ {
+ return -1;
+ }
+
+ memcpy ( ( buffer->path_buffer ) [pos], path, buffer->max_path_length * sizeof ( unsigned int ) );
+
+ for ( unsigned int i = 1; i < path[0]; i++ )
+ {
+ pthread_spin_lock ( ( buffer->locks ) + path[i] );
+ ( ( buffer->mark_on_edge ) [path[i]] ) ++;
+ pthread_spin_unlock ( ( buffer->locks ) + path[i] );
+ }
+
+ buffer->filled_num++;
+ return 1;
}
-int is_full ( struct edge_path_buffer * buffer )
+int is_full ( struct edge_path_buffer *buffer )
{
- if ( buffer->filled_num == buffer->buff_size )
- {
- return 1;
- }
- else if ( buffer->filled_num < buffer->buff_size )
- {
- return 0;
- }
- else
- {
- return -1;
- }
+ if ( buffer->filled_num == buffer->buff_size )
+ {
+ return 1;
+ }
+ else if ( buffer->filled_num < buffer->buff_size )
+ {
+ return 0;
+ }
+ else
+ {
+ return -1;
+ }
}
-void clear_status ( struct edge_path_buffer * buffer )
+void clear_status ( struct edge_path_buffer *buffer )
{
- buffer->filled_num = 0;
+ buffer->filled_num = 0;
}
diff --git a/sparsePregraph/change.log b/sparsePregraph/change.log
deleted file mode 100644
index 99a26ff..0000000
--- a/sparsePregraph/change.log
+++ /dev/null
@@ -1,24 +0,0 @@
-1.change the edge node
-
-old:
-struct edge_node
-{
- uint64_t edge��50��edge_cov:7,len:6,used:1��
- struct edge_node *nxt_edge;
-};
-
-now:
-struct edge_node
-{
- uint64_t edge;
- uint64_t edge_cov:7,len:6,used:1,deleted:1;
- struct edge_node *nxt_edge;
-};
-
-so, the LoadGraph... function can't work when performed on an old hash data set.
-
-
-2. support bam format
-3. support -R
-4. support 127mer
-5. build vertex K_size -> gap .
\ No newline at end of file
diff --git a/sparsePregraph/convert_soapdenovo.cpp b/sparsePregraph/convert_soapdenovo.cpp
index 89852c8..168e640 100644
--- a/sparsePregraph/convert_soapdenovo.cpp
+++ b/sparsePregraph/convert_soapdenovo.cpp
@@ -1,7 +1,7 @@
/*
* convert_soapdenovo.cpp
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -46,107 +46,110 @@ Output:
Return:
None.
*************************************************/
-void convert_kmer ( kmer_t2 * sparse_kmer, int K_size )
+void convert_kmer ( kmer_t2 *sparse_kmer, int K_size )
{
- uint64_t tmp, tmp_res;
- int i, j, index, arr_sz, base;
- index = K_size / 32;
- arr_sz = sizeof ( kmer_t2 ) / sizeof ( uint64_t );
-
- for ( i = 0; i <= index; i++ )
- {
- tmp = ( sparse_kmer->kmer ) [arr_sz - 1 - i];
- tmp_res = 0;
-
- for ( j = 0; j < 32; j++ )
- {
- base = tmp & 3;
-
- switch ( base )
- {
- case 0 :
- break;
- case 1 :
- tmp_res |= ( 1LLU << 2 * j );
- break;
- case 2:
- tmp_res |= ( 3LLU << 2 * j );
- break;
- case 3:
- tmp_res |= ( 2LLU << 2 * j );
- break;
- }
-
- tmp = tmp >> 2;
- }
-
- ( sparse_kmer->kmer ) [arr_sz - 1 - i] = tmp_res;
- }
-
- /*
- uint64_t high=0,low=0;
- int chr;
- if(K_size>=33){
- for(int i=0;i<K_size-32;++i){
- chr = sparse_kmer[0] & 3;
- switch(chr){
- case 0:
- break;
- case 1:
- high|=(1LLU << 2 * i);
- break;
- case 2:
- high|=(3LLU << 2 * i);
- break;
- case 3:
- high|=(2LLU << 2 * i);
- break;
- }
-
- sparse_kmer[0] = sparse_kmer[0]>>2;
- }
-
- for(int i=0;i<32;++i){
- chr = sparse_kmer[1] & 3;
- switch(chr){
- case 0:
- break;
- case 1:
- low|=(1LLU << 2 * i);
- break;
- case 2:
- low|=(3LLU << 2 * i);
- break;
- case 3:
- low|=(2LLU << 2 * i);
- break;
- }
-
- sparse_kmer[1] = sparse_kmer[1]>>2;
- }
- }else{
- for(int i=0;i<K_size;++i){
- chr = sparse_kmer[1] & 3;
- switch(chr){
- case 0:
- break;
- case 1:
- low|=(1LLU << 2 * i);
- break;
- case 2:
- low|=(3LLU << 2 * i);
- break;
- case 3:
- low|=(2LLU << 2 * i);
- break;
- }
-
- sparse_kmer[1] = sparse_kmer[1]>>2;
- }
- }
- sparse_kmer[0] = high;
- sparse_kmer[1] = low;
- */
+ uint64_t tmp, tmp_res;
+ int i, j, index, arr_sz, base;
+ index = K_size / 32;
+ arr_sz = sizeof ( kmer_t2 ) / sizeof ( uint64_t );
+
+ for ( i = 0; i <= index; i++ )
+ {
+ tmp = ( sparse_kmer->kmer ) [arr_sz - 1 - i];
+ tmp_res = 0;
+
+ for ( j = 0; j < 32; j++ )
+ {
+ base = tmp & 3;
+
+ switch ( base )
+ {
+ case 0 :
+ break;
+
+ case 1 :
+ tmp_res |= ( 1LLU << 2 * j );
+ break;
+
+ case 2:
+ tmp_res |= ( 3LLU << 2 * j );
+ break;
+
+ case 3:
+ tmp_res |= ( 2LLU << 2 * j );
+ break;
+ }
+
+ tmp = tmp >> 2;
+ }
+
+ ( sparse_kmer->kmer ) [arr_sz - 1 - i] = tmp_res;
+ }
+
+ /*
+ uint64_t high=0,low=0;
+ int chr;
+ if(K_size>=33){
+ for(int i=0;i<K_size-32;++i){
+ chr = sparse_kmer[0] & 3;
+ switch(chr){
+ case 0:
+ break;
+ case 1:
+ high|=(1LLU << 2 * i);
+ break;
+ case 2:
+ high|=(3LLU << 2 * i);
+ break;
+ case 3:
+ high|=(2LLU << 2 * i);
+ break;
+ }
+
+ sparse_kmer[0] = sparse_kmer[0]>>2;
+ }
+
+ for(int i=0;i<32;++i){
+ chr = sparse_kmer[1] & 3;
+ switch(chr){
+ case 0:
+ break;
+ case 1:
+ low|=(1LLU << 2 * i);
+ break;
+ case 2:
+ low|=(3LLU << 2 * i);
+ break;
+ case 3:
+ low|=(2LLU << 2 * i);
+ break;
+ }
+
+ sparse_kmer[1] = sparse_kmer[1]>>2;
+ }
+ }else{
+ for(int i=0;i<K_size;++i){
+ chr = sparse_kmer[1] & 3;
+ switch(chr){
+ case 0:
+ break;
+ case 1:
+ low|=(1LLU << 2 * i);
+ break;
+ case 2:
+ low|=(3LLU << 2 * i);
+ break;
+ case 3:
+ low|=(2LLU << 2 * i);
+ break;
+ }
+
+ sparse_kmer[1] = sparse_kmer[1]>>2;
+ }
+ }
+ sparse_kmer[0] = high;
+ sparse_kmer[1] = low;
+ */
}
@@ -163,67 +166,67 @@ Output:
Return:
None.
*************************************************/
-static void fastReverseComp ( kmer_t2 * kmer2, int seq_size )
+static void fastReverseComp ( kmer_t2 *kmer2, int seq_size )
{
- int arr_sz;
- uint64_t * seq_arr;
- arr_sz = sizeof ( kmer_t2 ) / sizeof ( uint64_t ); //= 2 or 4
- seq_arr = kmer2->kmer;
- int tot_bits = arr_sz * 64;
-
- for ( int i = 0; i < arr_sz; ++i )
- {
- seq_arr[i] ^= 0xAAAAAAAAAAAAAAAALLU;
- seq_arr[i] = ( ( seq_arr[i] & 0x3333333333333333 ) << 2 ) | ( ( seq_arr[i] & 0xCCCCCCCCCCCCCCCC ) >> 2 );
- seq_arr[i] = ( ( seq_arr[i] & 0x0F0F0F0F0F0F0F0F ) << 4 ) | ( ( seq_arr[i] & 0xF0F0F0F0F0F0F0F0 ) >> 4 );
- seq_arr[i] = ( ( seq_arr[i] & 0x00FF00FF00FF00FF ) << 8 ) | ( ( seq_arr[i] & 0xFF00FF00FF00FF00 ) >> 8 );
- seq_arr[i] = ( ( seq_arr[i] & 0x0000FFFF0000FFFF ) << 16 ) | ( ( seq_arr[i] & 0xFFFF0000FFFF0000 ) >> 16 );
- seq_arr[i] = ( ( seq_arr[i] & 0x00000000FFFFFFFF ) << 32 ) | ( ( seq_arr[i] & 0xFFFFFFFF00000000 ) >> 32 );
- }
-
- int j = 0, k = arr_sz - 1;
-
- for ( ; j < k; ++j, --k )
- {
- uint64_t temp;
- temp = seq_arr[j];
- seq_arr[j] = seq_arr[k];
- seq_arr[k] = temp;
- }
-
- R_shift_NC ( seq_arr, tot_bits - ( seq_size * 2 ), arr_sz );
+ int arr_sz;
+ uint64_t *seq_arr;
+ arr_sz = sizeof ( kmer_t2 ) / sizeof ( uint64_t ); //= 2 or 4
+ seq_arr = kmer2->kmer;
+ int tot_bits = arr_sz * 64;
+
+ for ( int i = 0; i < arr_sz; ++i )
+ {
+ seq_arr[i] ^= 0xAAAAAAAAAAAAAAAALLU;
+ seq_arr[i] = ( ( seq_arr[i] & 0x3333333333333333 ) << 2 ) | ( ( seq_arr[i] & 0xCCCCCCCCCCCCCCCC ) >> 2 );
+ seq_arr[i] = ( ( seq_arr[i] & 0x0F0F0F0F0F0F0F0F ) << 4 ) | ( ( seq_arr[i] & 0xF0F0F0F0F0F0F0F0 ) >> 4 );
+ seq_arr[i] = ( ( seq_arr[i] & 0x00FF00FF00FF00FF ) << 8 ) | ( ( seq_arr[i] & 0xFF00FF00FF00FF00 ) >> 8 );
+ seq_arr[i] = ( ( seq_arr[i] & 0x0000FFFF0000FFFF ) << 16 ) | ( ( seq_arr[i] & 0xFFFF0000FFFF0000 ) >> 16 );
+ seq_arr[i] = ( ( seq_arr[i] & 0x00000000FFFFFFFF ) << 32 ) | ( ( seq_arr[i] & 0xFFFFFFFF00000000 ) >> 32 );
+ }
+
+ int j = 0, k = arr_sz - 1;
+
+ for ( ; j < k; ++j, --k )
+ {
+ uint64_t temp;
+ temp = seq_arr[j];
+ seq_arr[j] = seq_arr[k];
+ seq_arr[k] = temp;
+ }
+
+ R_shift_NC ( seq_arr, tot_bits - ( seq_size * 2 ), arr_sz );
}
struct classcomp
{
- bool operator() ( const kmer_t2 & t1, const kmer_t2 & t2 ) const
- {
- int Kmer_arr_sz = sizeof ( kmer_t2 ) / sizeof ( uint64_t );
-
- for ( int jj = 0; jj < Kmer_arr_sz; ++jj )
- {
- if ( ( t1.kmer ) [jj] < ( t2.kmer ) [jj] )
- {
- return 1;
- }
- else if ( ( t1.kmer ) [jj] > ( t2.kmer ) [jj] )
- {
- return 0;
- }
-
- continue;
- }
-
- return 0;
- /* old
- if((t1.kmer)[0] < (t2.kmer)[0]){
- return 1;
- }else if((t1.kmer)[0] == (t2.kmer)[0]){
- return (t1.kmer)[1] < (t2.kmer)[1];
- }else{
- return 0;
- }*/
- }
+ bool operator() ( const kmer_t2 &t1, const kmer_t2 &t2 ) const
+ {
+ int Kmer_arr_sz = sizeof ( kmer_t2 ) / sizeof ( uint64_t );
+
+ for ( int jj = 0; jj < Kmer_arr_sz; ++jj )
+ {
+ if ( ( t1.kmer ) [jj] < ( t2.kmer ) [jj] )
+ {
+ return 1;
+ }
+ else if ( ( t1.kmer ) [jj] > ( t2.kmer ) [jj] )
+ {
+ return 0;
+ }
+
+ continue;
+ }
+
+ return 0;
+ /* old
+ if((t1.kmer)[0] < (t2.kmer)[0]){
+ return 1;
+ }else if((t1.kmer)[0] == (t2.kmer)[0]){
+ return (t1.kmer)[1] < (t2.kmer)[1];
+ }else{
+ return 0;
+ }*/
+ }
};
@@ -242,209 +245,218 @@ Output:
Return:
None.
*************************************************/
-void convert ( char * sparse_edge_file, int K_size, char * output_prefix )
+void convert ( char *sparse_edge_file, int K_size, char *output_prefix )
{
- if ( run_mode != 0 )
- {
- char temp[256];
- sprintf ( temp, "%s.preGraphBasic", output_prefix );
- FILE * fp = fopen ( temp, "r" );
- char line[1024];
- fgets ( line, 1024, fp );
- fgets ( line, 1024, fp );
- fgets ( line, 1024, fp );
- fclose ( fp );
- sscanf ( line, "%s %d %s %d", temp, &max_rd_len, temp, &min_rd_len );
- }
-
- FILE * fin, *fout2, *fout3;
- fin = fopen ( sparse_edge_file, "r" );
- gzFile fout;
- char temp[256];
- sprintf ( temp, "%s.edge.gz", output_prefix );
- fout = gzopen ( temp, "w" );
- //fout= fopen(temp, "w");//edge
- //write as gzip file
- sprintf ( temp, "%s.vertex", output_prefix );
- fout2 = fopen ( temp, "w" );
- sprintf ( temp, "%s.preGraphBasic", output_prefix );
- fout3 = fopen ( temp, "w" );
-
- if ( !fin || !fout || !fout2 || !fout3 )
- {
- fprintf ( stderr, "can't open file %s\n", sparse_edge_file );
- exit ( 1 );
- }
-
- //cout << "right 0"<<endl;
- kmer_t2 from_kmer, to_kmer;
- size_t line_len, edge_len_left;
- int edge_len;
- int cvg;
- int bal_ed;//����Ϊ0
- char str[32];
- const int BUFF_LEN = 1024;
- char line[BUFF_LEN];
- int start = 0;
- int cutoff = 100;
- map<kmer_t2, int, classcomp> vertex_nodes;
- size_t edge_counter = 0, vertex_counter = 0;
- int j = 0;
-
- //cout << "right 1"<<endl;
-
- while ( fgets ( line, BUFF_LEN, fin ) != NULL )
- {
- //cout << "right 2"<<endl;
- if ( line[0] == '>' ) //get one edge length, from vertex, to vertex,cvg,bal
- {
- edge_counter++;
+ if ( run_mode != 0 )
+ {
+ char temp[256];
+ sprintf ( temp, "%s.preGraphBasic", output_prefix );
+ FILE *fp = fopen ( temp, "r" );
+ char line[1024];
+ fgets ( line, 1024, fp );
+ fgets ( line, 1024, fp );
+ fgets ( line, 1024, fp );
+ fclose ( fp );
+ sscanf ( line, "%s %d %s %d", temp, &max_rd_len, temp, &min_rd_len );
+ }
+
+ FILE *fin, *fout2, *fout3;
+ fin = fopen ( sparse_edge_file, "r" );
+ gzFile fout;
+ char temp[256];
+ sprintf ( temp, "%s.edge.gz", output_prefix );
+ fout = gzopen ( temp, "w" );
+ //fout= fopen(temp, "w");//edge
+ //write as gzip file
+ sprintf ( temp, "%s.vertex", output_prefix );
+ fout2 = fopen ( temp, "w" );
+ sprintf ( temp, "%s.preGraphBasic", output_prefix );
+ fout3 = fopen ( temp, "w" );
+
+ if ( !fin || !fout || !fout2 || !fout3 )
+ {
+ fprintf ( stderr, "can't open file %s\n", sparse_edge_file );
+ exit ( 1 );
+ }
+
+ //cout << "right 0"<<endl;
+ kmer_t2 from_kmer, to_kmer;
+ size_t line_len, edge_len_left;
+ int edge_len;
+ int cvg;
+ int bal_ed;//����Ϊ0
+ char str[32];
+ const int BUFF_LEN = 1024;
+ char line[BUFF_LEN];
+ int start = 0;
+ int cutoff = 100;
+ map<kmer_t2, int, classcomp> vertex_nodes;
+ size_t edge_counter = 0, vertex_counter = 0;
+ int j = 0;
+
+ //cout << "right 1"<<endl;
+
+ while ( fgets ( line, BUFF_LEN, fin ) != NULL )
+ {
+ //cout << "right 2"<<endl;
+ if ( line[0] == '>' ) //get one edge length, from vertex, to vertex,cvg,bal
+ {
+ edge_counter++;
#ifdef _63MER_
- sscanf ( line + 7, "%d,%llx %llx,%llx %llx,cvg %d,%d", &edge_len,
- & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
+ sscanf ( line + 7, "%d,%llx %llx,%llx %llx,cvg %d,%d", &edge_len,
+ & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
#endif
#ifdef _127MER_
- sscanf ( line + 7, "%d,%llx %llx %llx %llx,%llx %llx %llx %llx,cvg %d,%d",
- &edge_len, & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( from_kmer.kmer ) [2], & ( from_kmer.kmer ) [3],
- & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], & ( to_kmer.kmer ) [2], & ( to_kmer.kmer ) [3], &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
+ sscanf ( line + 7, "%d,%llx %llx %llx %llx,%llx %llx %llx %llx,cvg %d,%d",
+ &edge_len, & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( from_kmer.kmer ) [2], & ( from_kmer.kmer ) [3],
+ & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], & ( to_kmer.kmer ) [2], & ( to_kmer.kmer ) [3], &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
#endif
- if ( edge_len == 1 )
- {
- cvg = 0;
- }
- else
- {
- cvg *= 10;
- }
-
- convert_kmer ( &from_kmer, K_size );
- convert_kmer ( &to_kmer, K_size );
+ if ( edge_len == 1 )
+ {
+ cvg = 0;
+ }
+ else
+ {
+ cvg *= 10;
+ }
+
+ convert_kmer ( &from_kmer, K_size );
+ convert_kmer ( &to_kmer, K_size );
#ifdef _63MER_
- gzprintf ( fout, ">length %d,%llx %llx,%llx %llx,cvg %d,%d\n", edge_len,
- ( from_kmer.kmer ) [0], ( from_kmer.kmer ) [1], ( to_kmer.kmer ) [0], ( to_kmer.kmer ) [1], cvg, bal_ed );
+ gzprintf ( fout, ">length %d,%llx %llx,%llx %llx,cvg %d,%d\n", edge_len,
+ ( from_kmer.kmer ) [0], ( from_kmer.kmer ) [1], ( to_kmer.kmer ) [0], ( to_kmer.kmer ) [1], cvg, bal_ed );
#endif
#ifdef _127MER_
- gzprintf ( fout, ">length %d,%llx %llx %llx %llx,%llx %llx %llx %llx,cvg %d,%d\n", edge_len,
- ( from_kmer.kmer ) [0], ( from_kmer.kmer ) [1], ( from_kmer.kmer ) [2], ( from_kmer.kmer ) [3],
- ( to_kmer.kmer ) [0], ( to_kmer.kmer ) [1], ( to_kmer.kmer ) [2], ( to_kmer.kmer ) [3], cvg, bal_ed );
+ gzprintf ( fout, ">length %d,%llx %llx %llx %llx,%llx %llx %llx %llx,cvg %d,%d\n", edge_len,
+ ( from_kmer.kmer ) [0], ( from_kmer.kmer ) [1], ( from_kmer.kmer ) [2], ( from_kmer.kmer ) [3],
+ ( to_kmer.kmer ) [0], ( to_kmer.kmer ) [1], ( to_kmer.kmer ) [2], ( to_kmer.kmer ) [3], cvg, bal_ed );
#endif
- if ( bal_ed ) { edge_counter++; }
-
- kmer_t2 f_kmer = from_kmer;
- fastReverseComp ( &f_kmer, K_size );
-
- if ( kmerCompare ( &f_kmer, &from_kmer ) < 0 )
- {
- from_kmer = f_kmer;
- }
-
- vertex_nodes[from_kmer]++;
- f_kmer = to_kmer;
- fastReverseComp ( &f_kmer, K_size );
-
- if ( kmerCompare ( &f_kmer, &to_kmer ) < 0 )
- {
- to_kmer = f_kmer;
- }
-
- vertex_nodes[to_kmer]++;
- start = 1;
- j = 0;
- }
- else
- {
- //print the sequence
- if ( start == 1 )
- {
- //skip the first kmer
- int len = strlen ( line );
-
- if ( line[len - 1] == '\n' )
- {
- line[len - 1] == '\0';
- len --;
- }
-
- for ( int i = K_size; i < len; i++ )
- {
- j++;
- gzprintf ( fout, "%c", line[i] );
-
- if ( j % 100 == 0 )
- {
- gzprintf ( fout, "\n" );
- }
- }
-
- edge_len -= ( len - K_size );
-
- if ( edge_len == 0 && j % 100 != 0 )
- {
- gzprintf ( fout, "\n" );
- }
-
- start = 2;
- }
- else //start = 2
- {
- if ( line[0] == '\n' ) { continue; }
-
- int len = strlen ( line );
-
- if ( line[len - 1] == '\n' )
- {
- line[len - 1] == '\0';
- len --;
- }
-
- for ( int i = 0; i < len; i++ )
- {
- j++;
- gzprintf ( fout, "%c", line[i] );
-
- if ( j % 100 == 0 )
- {
- gzprintf ( fout, "\n" );
- }
- }
-
- edge_len -= len;
-
- if ( edge_len == 0 && j % 100 != 0 )
- {
- gzprintf ( fout, "\n" );
- }
- }
- }
- }
-
- //fprintf(stderr,"size of map: %llu\n",vertex_nodes.size());
- map<kmer_t2, int>::iterator it;
-
- for ( it = vertex_nodes.begin(); it != vertex_nodes.end(); ++it )
- {
- vertex_counter++;
+ if ( bal_ed )
+ {
+ edge_counter++;
+ }
+
+ kmer_t2 f_kmer = from_kmer;
+ fastReverseComp ( &f_kmer, K_size );
+
+ if ( kmerCompare ( &f_kmer, &from_kmer ) < 0 )
+ {
+ from_kmer = f_kmer;
+ }
+
+ vertex_nodes[from_kmer]++;
+ f_kmer = to_kmer;
+ fastReverseComp ( &f_kmer, K_size );
+
+ if ( kmerCompare ( &f_kmer, &to_kmer ) < 0 )
+ {
+ to_kmer = f_kmer;
+ }
+
+ vertex_nodes[to_kmer]++;
+ start = 1;
+ j = 0;
+ }
+ else
+ {
+ //print the sequence
+ if ( start == 1 )
+ {
+ //skip the first kmer
+ int len = strlen ( line );
+
+ if ( line[len - 1] == '\n' )
+ {
+ line[len - 1] == '\0';
+ len --;
+ }
+
+ for ( int i = K_size; i < len; i++ )
+ {
+ j++;
+ gzprintf ( fout, "%c", line[i] );
+
+ if ( j % 100 == 0 )
+ {
+ gzprintf ( fout, "\n" );
+ }
+ }
+
+ edge_len -= ( len - K_size );
+
+ if ( edge_len == 0 && j % 100 != 0 )
+ {
+ gzprintf ( fout, "\n" );
+ }
+
+ start = 2;
+ }
+ else //start = 2
+ {
+ if ( line[0] == '\n' )
+ {
+ continue;
+ }
+
+ int len = strlen ( line );
+
+ if ( line[len - 1] == '\n' )
+ {
+ line[len - 1] == '\0';
+ len --;
+ }
+
+ for ( int i = 0; i < len; i++ )
+ {
+ j++;
+ gzprintf ( fout, "%c", line[i] );
+
+ if ( j % 100 == 0 )
+ {
+ gzprintf ( fout, "\n" );
+ }
+ }
+
+ edge_len -= len;
+
+ if ( edge_len == 0 && j % 100 != 0 )
+ {
+ gzprintf ( fout, "\n" );
+ }
+ }
+ }
+ }
+
+ //fprintf(stderr,"size of map: %llu\n",vertex_nodes.size());
+ map<kmer_t2, int>::iterator it;
+
+ for ( it = vertex_nodes.begin(); it != vertex_nodes.end(); ++it )
+ {
+ vertex_counter++;
#ifdef _63MER_
- fprintf ( fout2, "%llx %llx ", ( ( *it ).first.kmer ) [0], ( ( *it ).first.kmer ) [1] );
+ fprintf ( fout2, "%llx %llx ", ( ( *it ).first.kmer ) [0], ( ( *it ).first.kmer ) [1] );
#endif
#ifdef _127MER_
- fprintf ( fout2, "%llx %llx %llx %llx ", ( ( *it ).first.kmer ) [0], ( ( *it ).first.kmer ) [1],
- ( ( *it ).first.kmer ) [2], ( ( *it ).first.kmer ) [3] );
+ fprintf ( fout2, "%llx %llx %llx %llx ", ( ( *it ).first.kmer ) [0], ( ( *it ).first.kmer ) [1],
+ ( ( *it ).first.kmer ) [2], ( ( *it ).first.kmer ) [3] );
#endif
- if ( vertex_counter % 8 == 0 ) { fprintf ( fout2, "\n" ); }
- }
-
- fprintf ( fout3, "VERTEX %lu K %d\n", vertex_counter, K_size );
- fprintf ( fout3, "EDGEs %lu\n", edge_counter );
- fprintf ( stderr, "%llu edges and %llu vertexes constructed.\n", edge_counter, vertex_counter );
- fprintf ( fout3, "MaxReadLen %d MinReadLen %d MaxNameLen 256\n", max_rd_len, min_rd_len );
- fclose ( fin );
- gzclose ( fout );
- fclose ( fout2 );
- fclose ( fout3 );
+ if ( vertex_counter % 8 == 0 )
+ {
+ fprintf ( fout2, "\n" );
+ }
+ }
+
+ fprintf ( fout3, "VERTEX %lu K %d\n", vertex_counter, K_size );
+ fprintf ( fout3, "EDGEs %lu\n", edge_counter );
+ fprintf ( stderr, "%llu edges and %llu vertexes constructed.\n", edge_counter, vertex_counter );
+ fprintf ( fout3, "MaxReadLen %d MinReadLen %d MaxNameLen 256\n", max_rd_len, min_rd_len );
+ fclose ( fin );
+ gzclose ( fout );
+ fclose ( fout2 );
+ fclose ( fout3 );
}
diff --git a/sparsePregraph/global.cpp b/sparsePregraph/global.cpp
index 2a49afd..37139d0 100644
--- a/sparsePregraph/global.cpp
+++ b/sparsePregraph/global.cpp
@@ -1,7 +1,7 @@
/*
* global.cpp
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -36,18 +36,18 @@ int run_mode = 0;
int thrd_num_s = 8;
-size_t * edge_cnt_total = NULL; //used int lock strategy
-size_t * bucket_count_total = NULL; //used in lock strategy
+size_t *edge_cnt_total = NULL; //used int lock strategy
+size_t *bucket_count_total = NULL; //used in lock strategy
//for io thread @see io_func.h
//for io thread
-string * seq_t = NULL;
+string *seq_t = NULL;
int io_ready; //0 ready to work 1 working 2 *seq_t ready 3 end reading signal
int read_num = 0; //the read num in *seq_t
-string * read_buf0 = NULL;
-string * read_buf1 = NULL;
+string *read_buf0 = NULL;
+string *read_buf1 = NULL;
int io_stat0 = 1; //must be one, if io_stat0 =0 ,the io thread will work immediately
int io_stat1 = 1;
@@ -58,19 +58,19 @@ int min_rd_len = 100000;
//for the hashing lock strategy ...
-pthread_spinlock_t * locks;
+pthread_spinlock_t *locks;
// solving tiny repeats, temporarily using global vars to implements this feature
-unsigned int * mark_on_edge = NULL;
-pthread_spinlock_t * s_locks = NULL;
-struct edge_path_buffer ** path_buffer = NULL;
+unsigned int *mark_on_edge = NULL;
+pthread_spinlock_t *s_locks = NULL;
+struct edge_path_buffer **path_buffer = NULL;
unsigned long long buff_size = 1024;
unsigned int max_path_length = 128; //max_path_length-1 is the real max path length, because the first int of buffer record the path length
-FILE * mark_fp = NULL; //
-FILE * path_fp = NULL; //
+FILE *mark_fp = NULL; //
+FILE *path_fp = NULL; //
pthread_mutex_t file_lock;//
int debug = 0 ;
diff --git a/sparsePregraph/inc/build_edge.h b/sparsePregraph/inc/build_edge.h
index 2b6fa03..86b0e8d 100644
--- a/sparsePregraph/inc/build_edge.h
+++ b/sparsePregraph/inc/build_edge.h
@@ -1,7 +1,7 @@
/*
* inc/sparse_kmer.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -24,26 +24,26 @@
#define _BUILD_EDGE_H
-void removeMinorTips ( struct hashtable2 * ht, int K_size, int cut_len_tip, int & tip_c );
-void kmer2edges ( hashtable2 * ht, int K_size, char * outfile );
-void convert ( char * sparse_edge_file, int K_size, char * output_prefix );
-void RemovingWeakNodesAndEdges2 ( hashtable2 * ht, int K_size, int NodeCovTh, int EdgeCovTh, size_t * bucket_cnt, size_t * edge_cnt );
+void removeMinorTips ( struct hashtable2 *ht, int K_size, int cut_len_tip, int &tip_c );
+void kmer2edges ( hashtable2 *ht, int K_size, char *outfile );
+void convert ( char *sparse_edge_file, int K_size, char *output_prefix );
+void RemovingWeakNodesAndEdges2 ( hashtable2 *ht, int K_size, int NodeCovTh, int EdgeCovTh, size_t *bucket_cnt, size_t *edge_cnt );
struct stacked_node2
{
- struct bucket2 * node;
- bool is_left; // change it to a byte later
- struct edge_node * edge;
- struct stacked_node2 * next;
+ struct bucket2 *node;
+ bool is_left; // change it to a byte later
+ struct edge_node *edge;
+ struct stacked_node2 *next;
};
typedef struct preedge2
{
- struct stacked_node2 * from_node;
- struct stacked_node2 * to_node;
- string * full_edge;
- unsigned short cvg;
- unsigned short bal_edge: 2;
+ struct stacked_node2 *from_node;
+ struct stacked_node2 *to_node;
+ string *full_edge;
+ unsigned short cvg;
+ unsigned short bal_edge: 2;
} preEDGE2;
@@ -51,39 +51,39 @@ typedef struct preedge2
// below is static methods
//remove minor tips ...
//void removeMinorTips(struct hashtable2 *ht,int K_size,int cut_len_tip,int &tip_c);
-static void mask1in1out ( hashtable2 * ht );
-static int clipTipFromNode ( hashtable2 * ht, int K_size, bucket2 * node, int cut_len_tip );
-static int count_left_edge_num ( bucket2 * bkt );
-static int count_right_edge_num ( bucket2 * bkt );
+static void mask1in1out ( hashtable2 *ht );
+static int clipTipFromNode ( hashtable2 *ht, int K_size, bucket2 *node, int cut_len_tip );
+static int count_left_edge_num ( bucket2 *bkt );
+static int count_right_edge_num ( bucket2 *bkt );
-static void dislink ( hashtable2 * ht, int K_size, stacked_node2 * from_node );
-static bucket2 * lastKmer ( hashtable2 * ht, int K_size, bucket2 * node, edge_node * edge, int is_left, int & smaller );
+static void dislink ( hashtable2 *ht, int K_size, stacked_node2 *from_node );
+static bucket2 *lastKmer ( hashtable2 *ht, int K_size, bucket2 *node, edge_node *edge, int is_left, int &smaller );
//static bucket2* search_kmer(hashtable2 *ht,uint64_t* t_kmer, int Kmer_arr_sz); old
-static bucket2 * search_kmer ( hashtable2 * ht, kmer_t2 * t_kmer );
+static bucket2 *search_kmer ( hashtable2 *ht, kmer_t2 *t_kmer );
-static void removeEdge ( bucket2 * node, edge_node * edge, int is_left );
-static void stat_edge_num ( hashtable2 * ht );
-static void stat_edge_cvg_len ( hashtable2 * ht );
-static bool isSmaller2 ( uint64_t * kmer, int K_size );
+static void removeEdge ( bucket2 *node, edge_node *edge, int is_left );
+static void stat_edge_num ( hashtable2 *ht );
+static void stat_edge_cvg_len ( hashtable2 *ht );
+static bool isSmaller2 ( uint64_t *kmer, int K_size );
//kmer2edges ....
//void kmer2edges(hashtable2* ht,int K_size,char *outfile);
-static void make_edge ( hashtable2 * ht, int K_size, FILE * fp );
-static int startEdgeFromNode ( hashtable2 * ht, int K_size, bucket2 * node, FILE * fp );
-static void stringBeads ( hashtable2 * ht, int K_size, list<stacked_node2 *> &stack, stacked_node2 * from_node, edge_node * from_edge, int * node_c );
+static void make_edge ( hashtable2 *ht, int K_size, FILE *fp );
+static int startEdgeFromNode ( hashtable2 *ht, int K_size, bucket2 *node, FILE *fp );
+static void stringBeads ( hashtable2 *ht, int K_size, list<stacked_node2 *> &stack, stacked_node2 *from_node, edge_node *from_edge, int *node_c );
-static void process_1stack ( hashtable2 * ht, int K_size, list<stacked_node2 *> &stack, FILE * fp, vector<preEDGE2> &loops_edges );
+static void process_1stack ( hashtable2 *ht, int K_size, list<stacked_node2 *> &stack, FILE *fp, vector<preEDGE2> &loops_edges );
//static void get_kmer(const char * seq,int len, int K_size,int pos,uint64_t *kmer,int arr_sz );
-static void output_1edge ( preEDGE2 * long_edge, int K_size, FILE * fp );
-static string stack2string ( hashtable2 * ht, int K_size, list<stacked_node2 *> & stack );
-static bool check_palindrome ( string & str );
-static string revCompSeq ( const string & str );
+static void output_1edge ( preEDGE2 *long_edge, int K_size, FILE *fp );
+static string stack2string ( hashtable2 *ht, int K_size, list<stacked_node2 *> &stack );
+static bool check_palindrome ( string &str );
+static string revCompSeq ( const string &str );
//convert the edge fomat ...
//void convert(char * sparse_edge_file,int K_size, char * output_prefix);
-static void convert_kmer ( uint64_t * sparse_kmer, int K_size, int arr_sz );
-static uint64_t * fastReverseComp ( uint64_t * seq_arr, int seq_size, int arr_sz );
+static void convert_kmer ( uint64_t *sparse_kmer, int K_size, int arr_sz );
+static uint64_t *fastReverseComp ( uint64_t *seq_arr, int seq_size, int arr_sz );
#endif
diff --git a/sparsePregraph/inc/build_graph.h b/sparsePregraph/inc/build_graph.h
index 6ff5818..2c5df1c 100644
--- a/sparsePregraph/inc/build_graph.h
+++ b/sparsePregraph/inc/build_graph.h
@@ -1,7 +1,7 @@
/*
* inc/build_graph.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -29,10 +29,10 @@
#include "core.h"
//for called
-void run_process_threaded ( struct hashtable2 * ht, pthread_spinlock_t * locks, int K_size, int gap, size_t read_num, int thrd_num, int thrd_id, int round );
-void SwitchBuckets ( hashtable2 * ht2, int K_size );
-void SavingSparseKmerGraph2 ( hashtable2 * ht, char * outfile );
-void LoadingSparseKmerGraph2 ( hashtable2 * ht, char * outfile );
+void run_process_threaded ( struct hashtable2 *ht, pthread_spinlock_t *locks, int K_size, int gap, size_t read_num, int thrd_num, int thrd_id, int round );
+void SwitchBuckets ( hashtable2 *ht2, int K_size );
+void SavingSparseKmerGraph2 ( hashtable2 *ht, char *outfile );
+void LoadingSparseKmerGraph2 ( hashtable2 *ht, char *outfile );
#endif
diff --git a/sparsePregraph/inc/build_preArc.h b/sparsePregraph/inc/build_preArc.h
index df683ca..4276a93 100644
--- a/sparsePregraph/inc/build_preArc.h
+++ b/sparsePregraph/inc/build_preArc.h
@@ -1,7 +1,7 @@
/*
* inc/build_preArc.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -28,144 +28,144 @@
struct edge_starter2
{
- struct kmer_t2 edge_kmer;
- uint64_t edge_id: 32, len: 6; //make sure that left always be end & right always be start
- edge_starter2 * next;
+ struct kmer_t2 edge_kmer;
+ uint64_t edge_id: 32, len: 6; //make sure that left always be end & right always be start
+ edge_starter2 *next;
};
struct vertex2
{
- struct kmer_t2 kmer_t2;
- edge_starter2 * left;
- edge_starter2 * right;
- vertex2 * next;
+ struct kmer_t2 kmer_t2;
+ edge_starter2 *left;
+ edge_starter2 *right;
+ vertex2 *next;
};
struct vertex_hash2
{
- struct vertex2 ** store_pos;
- size_t ht_sz;
+ struct vertex2 **store_pos;
+ size_t ht_sz;
};
struct preArc
{
- unsigned int to_ed;
- unsigned int multiplicity;
- struct preArc * next;
+ unsigned int to_ed;
+ unsigned int multiplicity;
+ struct preArc *next;
};
struct preArc_array
{
- struct preArc ** store_pos;
- size_t array_sz;
+ struct preArc **store_pos;
+ size_t array_sz;
};
//public methods
-void init_vertex_hash ( vertex_hash2 * v_ht, size_t sz );
-void build_vertexes ( vertex_hash2 * v_ht, int K_size, char * edge_file );
-void free_vertex_hash ( vertex_hash2 * v_ht );
+void init_vertex_hash ( vertex_hash2 *v_ht, size_t sz );
+void build_vertexes ( vertex_hash2 *v_ht, int K_size, char *edge_file );
+void free_vertex_hash ( vertex_hash2 *v_ht );
-void init_preArc_array ( preArc_array * arc_array, size_t sz );
-void build_preArc_threaded ( preArc_array * arc_arr, vertex_hash2 * v_ht, int K_size, int cut_off_len, vector<string> *in_filenames_vt, int thread_num );
-void output_preArcs ( preArc_array * arc_arr, char * outfile );
-void free_preArc_array ( preArc_array * arc_array );
+void init_preArc_array ( preArc_array *arc_array, size_t sz );
+void build_preArc_threaded ( preArc_array *arc_arr, vertex_hash2 *v_ht, int K_size, int cut_off_len, vector<string> *in_filenames_vt, int thread_num );
+void output_preArcs ( preArc_array *arc_arr, char *outfile );
+void free_preArc_array ( preArc_array *arc_array );
//local structs ...
struct io_para
{
- //char **buf0;
- //char **buf1;
- int * io_stat0;
- int * io_stat1;
- int * read_num0;
- int * read_num1;
+ //char **buf0;
+ //char **buf1;
+ int *io_stat0;
+ int *io_stat1;
+ int *read_num0;
+ int *read_num1;
- int * finished_arr0;
- int * finished_arr1;
+ int *finished_arr0;
+ int *finished_arr1;
- //FILE *fp;
+ //FILE *fp;
- vector<string> *in_filenames_vt;
+ vector<string> *in_filenames_vt;
- int read_buf_sz;
- int read_buf_len;
+ int read_buf_sz;
+ int read_buf_len;
- int thread_num;
+ int thread_num;
};
struct process_para
{
- //char **buf0;
- //char **buf1;
- int * io_stat0;
- int * io_stat1;
- int * read_num0;
- int * read_num1;
+ //char **buf0;
+ //char **buf1;
+ int *io_stat0;
+ int *io_stat1;
+ int *read_num0;
+ int *read_num1;
- int * finished_arr0;
- int * finished_arr1;
+ int *finished_arr0;
+ int *finished_arr1;
- preArc_array * preArcs;//change preArc** to preArc* for spin_lock version
- pthread_spinlock_t * locks; //...
+ preArc_array *preArcs; //change preArc** to preArc* for spin_lock version
+ pthread_spinlock_t *locks; //...
- int thread_id;
- int thread_num;
+ int thread_id;
+ int thread_num;
- vertex_hash2 * v_ht;
- int K_size;
- int cut_off_len;
+ vertex_hash2 *v_ht;
+ int K_size;
+ int cut_off_len;
};
-void process_1read_preArc ( preArc_array * arc_arr, pthread_spinlock_t * locks, int thread_id, vertex_hash2 * v_ht, int K_size, int cut_off_len, const char * read );
+void process_1read_preArc ( preArc_array *arc_arr, pthread_spinlock_t *locks, int thread_id, vertex_hash2 *v_ht, int K_size, int cut_off_len, const char *read );
//static methods
-static void process_edge ( vertex_hash2 * v_ht, int K_size, char * seq, int len, int type, size_t edge_id, bool bal_edge );
-static vertex2 * put_vertex ( vertex_hash2 * v_ht, kmer_t2 vertex_kmer, int & is_found );
-static void put_edge ( vertex2 * ver, kmer_t2 edge_kmer, bool is_left, int len, size_t edge_id );
-static vertex2 * search_vertex ( vertex_hash2 * v_ht, kmer_t2 * vertex_kmer );
-static void free_vertex ( vertex2 * tmp );
+static void process_edge ( vertex_hash2 *v_ht, int K_size, char *seq, int len, int type, size_t edge_id, bool bal_edge );
+static vertex2 *put_vertex ( vertex_hash2 *v_ht, kmer_t2 vertex_kmer, int &is_found );
+static void put_edge ( vertex2 *ver, kmer_t2 edge_kmer, bool is_left, int len, size_t edge_id );
+static vertex2 *search_vertex ( vertex_hash2 *v_ht, kmer_t2 *vertex_kmer );
+static void free_vertex ( vertex2 *tmp );
-static void get_kmer ( const char * seq, int len, int K_size, int pos, kmer_t2 & kmer );
-static void chop_kmers ( char * read, int len, int K_size, kmer_t2 * kmer_array, int kmer_array_len, int & kmer_num );
+static void get_kmer ( const char *seq, int len, int K_size, int pos, kmer_t2 &kmer );
+static void chop_kmers ( char *read, int len, int K_size, kmer_t2 *kmer_array, int kmer_array_len, int &kmer_num );
-static void * run_io_thread ( void * arg );
-static void * run_process_thread ( void * arg );
+static void *run_io_thread ( void *arg );
+static void *run_process_thread ( void *arg );
-static void put_preArc ( preArc_array * arc_arr, size_t left_id, size_t right_id, int added_multi );
-static void put_preArc_threaded ( preArc_array * arc_arr, pthread_spinlock_t * locks, size_t left_id, size_t right_id, int added_multi );
+static void put_preArc ( preArc_array *arc_arr, size_t left_id, size_t right_id, int added_multi );
+static void put_preArc_threaded ( preArc_array *arc_arr, pthread_spinlock_t *locks, size_t left_id, size_t right_id, int added_multi );
//add for solving repeat
struct edge_path_buffer
{
- unsigned int * mark_on_edge; //The mark on edge array, record the times of occurrence for each edge and it's revers complement
- pthread_spinlock_t * locks; //the locks for multi threads access and modification to mark_on_edge
- unsigned int ** path_buffer; //buffered the paths for out put, (the first unsigned int is the length of the path.)
- unsigned int max_path_length; // the max length for each path //set to 255 default ...
- unsigned long long buff_size; // the max path number the buffer can sotre
- unsigned long long filled_num; // the filled number of the buffer
+ unsigned int *mark_on_edge; //The mark on edge array, record the times of occurrence for each edge and it's revers complement
+ pthread_spinlock_t *locks; //the locks for multi threads access and modification to mark_on_edge
+ unsigned int **path_buffer; //buffered the paths for out put, (the first unsigned int is the length of the path.)
+ unsigned int max_path_length; // the max length for each path //set to 255 default ...
+ unsigned long long buff_size; // the max path number the buffer can sotre
+ unsigned long long filled_num; // the filled number of the buffer
};
-struct edge_path_buffer * create_edge_path_buffer ( unsigned int * mark_on_edge, pthread_spinlock_t * locks, unsigned long long buff_size, unsigned int max_path_length );
-void destory_edge_path_buffer ( struct edge_path_buffer * buffer );
-void clear_edge_path_buffer ( struct edge_path_buffer * buffer );
-void output_edge_path_buffer ( struct edge_path_buffer * buffer, FILE * path_file );
-void output_edge_path_buffer_locked ( struct edge_path_buffer * buffer, FILE * path_file, pthread_mutex_t * file_mutex );
+struct edge_path_buffer *create_edge_path_buffer ( unsigned int *mark_on_edge, pthread_spinlock_t *locks, unsigned long long buff_size, unsigned int max_path_length );
+void destory_edge_path_buffer ( struct edge_path_buffer *buffer );
+void clear_edge_path_buffer ( struct edge_path_buffer *buffer );
+void output_edge_path_buffer ( struct edge_path_buffer *buffer, FILE *path_file );
+void output_edge_path_buffer_locked ( struct edge_path_buffer *buffer, FILE *path_file, pthread_mutex_t *file_mutex );
-int put_path_2_buffer ( struct edge_path_buffer * buffer, unsigned int * path );
+int put_path_2_buffer ( struct edge_path_buffer *buffer, unsigned int *path );
-void clear_status ( struct edge_path_buffer * buffer );
-int is_full ( struct edge_path_buffer * buffer );
+void clear_status ( struct edge_path_buffer *buffer );
+int is_full ( struct edge_path_buffer *buffer );
diff --git a/sparsePregraph/inc/convert_soapdenovo.h b/sparsePregraph/inc/convert_soapdenovo.h
index 05fe016..87ddc2b 100644
--- a/sparsePregraph/inc/convert_soapdenovo.h
+++ b/sparsePregraph/inc/convert_soapdenovo.h
@@ -1,7 +1,7 @@
/*
* inc/convert_soapdenovo.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -23,9 +23,9 @@
#define _CONVERT_SOAPDENOVO_H
-void convert ( char * sparse_edge_file, int K_size, char * output_prefix );
+void convert ( char *sparse_edge_file, int K_size, char *output_prefix );
-void convert_kmer ( kmer_t2 * sparse_kmer, int K_size );
+void convert_kmer ( kmer_t2 *sparse_kmer, int K_size );
#endif
diff --git a/sparsePregraph/inc/core.h b/sparsePregraph/inc/core.h
index 46c330f..cc62299 100644
--- a/sparsePregraph/inc/core.h
+++ b/sparsePregraph/inc/core.h
@@ -17,7 +17,7 @@
#ifdef _63MER_
struct kmer_t2 //use union later
{
- uint64_t kmer[2];
+ uint64_t kmer[2];
};
#endif
@@ -25,7 +25,7 @@ struct kmer_t2 //use union later
#ifdef _127MER_
struct kmer_t2 //use union later
{
- uint64_t kmer[4];
+ uint64_t kmer[4];
};
#endif
@@ -33,57 +33,57 @@ struct kmer_t2 //use union later
struct edge_node // kmer-edge the connection between sparse-kmer
{
- uint64_t edge: 50, edge_cov: 7, len: 6, used: 1;
- struct edge_node * nxt_edge;
+ uint64_t edge: 50, edge_cov: 7, len: 6, used: 1;
+ struct edge_node *nxt_edge;
};
//#pragma pack(4) // do pack(4) later
struct kmer_info
{
- //uint8_t used:1,split_left:1,split_right:1,removed:1,flip:1,marked:1,repeat:1;
+ //uint8_t used:1,split_left:1,split_right:1,removed:1,flip:1,marked:1,repeat:1;
- uint64_t used: 1, linear: 1, deleted: 1, single: 1, inEdge: 2, twin: 2, cov1: 16; //added for soapdenovo
+ uint64_t used: 1, linear: 1, deleted: 1, single: 1, inEdge: 2, twin: 2, cov1: 16; //added for soapdenovo
- //uint16_t cov1:16;
+ //uint16_t cov1:16;
- //uint32_t edge_id;//added for soapdenovo
+ //uint32_t edge_id;//added for soapdenovo
- struct edge_node * left;
- struct edge_node * right;
+ struct edge_node *left;
+ struct edge_node *right;
};
struct kmer_info_r1
{
- uint16_t cov1: 16;
+ uint16_t cov1: 16;
};
struct bucket2 //sparse-kmer
{
- struct kmer_t2 kmer_t2;
- struct kmer_info kmer_info;
- bucket2 * nxt_bucket;
+ struct kmer_t2 kmer_t2;
+ struct kmer_info kmer_info;
+ bucket2 *nxt_bucket;
};
struct bucket2_r1 //sparse-kmer struct for round1 ,
{
- struct kmer_t2 kmer_t2;
- struct kmer_info_r1 kmer_info;
- bucket2_r1 * nxt_bucket;
+ struct kmer_t2 kmer_t2;
+ struct kmer_info_r1 kmer_info;
+ bucket2_r1 *nxt_bucket;
};
struct hashtable2
{
- struct bucket2 ** store_pos;
- size_t ht_sz;
+ struct bucket2 **store_pos;
+ size_t ht_sz;
};
struct read_t //reads bits struct ...
{
- uint64_t read_bits[100];
- int readLen;
+ uint64_t read_bits[100];
+ int readLen;
};
@@ -94,88 +94,88 @@ struct read_t //reads bits struct ...
//bool look_up_in_a_list2(struct kmer_t2 *seq,struct bucket2 *** ptr);
//void free_hashtable(hashtable2 *ht);
-inline void Init_HT2 ( struct hashtable2 * ht, size_t ht_sz )
+inline void Init_HT2 ( struct hashtable2 *ht, size_t ht_sz )
{
- ht->ht_sz = ht_sz;
- ht->store_pos = ( struct bucket2 ** ) calloc ( ht_sz, sizeof ( struct bucket2 * ) );
+ ht->ht_sz = ht_sz;
+ ht->store_pos = ( struct bucket2 ** ) calloc ( ht_sz, sizeof ( struct bucket2 * ) );
- for ( size_t i = 0; i < ht_sz; ++i )
- {
- ht->store_pos[i] = NULL;
- }
+ for ( size_t i = 0; i < ht_sz; ++i )
+ {
+ ht->store_pos[i] = NULL;
+ }
}
-inline bool look_up_in_a_list2_r1 ( struct kmer_t2 * seq, struct bucket2_r1 ** * ptr )
+inline bool look_up_in_a_list2_r1 ( struct kmer_t2 *seq, struct bucket2_r1 *** ptr )
{
- while ( ( **ptr ) != NULL )
- {
- if ( memcmp ( & ( ( **ptr )->kmer_t2.kmer ), & ( seq->kmer ), sizeof ( kmer_t2 ) ) == 0 )
- {
- break;
- }
+ while ( ( **ptr ) != NULL )
+ {
+ if ( memcmp ( & ( ( **ptr )->kmer_t2.kmer ), & ( seq->kmer ), sizeof ( kmer_t2 ) ) == 0 )
+ {
+ break;
+ }
- ( *ptr ) = & ( ( **ptr )->nxt_bucket );
- }
+ ( *ptr ) = & ( ( **ptr )->nxt_bucket );
+ }
- return ( ( **ptr ) != NULL );
+ return ( ( **ptr ) != NULL );
}
-inline bool look_up_in_a_list2 ( struct kmer_t2 * seq, struct bucket2 ** * ptr )
+inline bool look_up_in_a_list2 ( struct kmer_t2 *seq, struct bucket2 *** ptr )
{
- while ( ( **ptr ) != NULL )
- {
- if ( memcmp ( & ( ( **ptr )->kmer_t2.kmer ), & ( seq->kmer ), sizeof ( kmer_t2 ) ) == 0 )
- {
- break;
- }
+ while ( ( **ptr ) != NULL )
+ {
+ if ( memcmp ( & ( ( **ptr )->kmer_t2.kmer ), & ( seq->kmer ), sizeof ( kmer_t2 ) ) == 0 )
+ {
+ break;
+ }
- ( *ptr ) = & ( ( **ptr )->nxt_bucket );
- }
+ ( *ptr ) = & ( ( **ptr )->nxt_bucket );
+ }
- return ( ( **ptr ) != NULL );
+ return ( ( **ptr ) != NULL );
}
-inline void free_bucket ( bucket2 * tmp )
+inline void free_bucket ( bucket2 *tmp )
{
- edge_node * edge, *edge2;
- edge = tmp->kmer_info.left;
-
- while ( edge )
- {
- edge2 = edge;
- edge = edge->nxt_edge;
- free ( edge );
- }
-
- edge = tmp->kmer_info.right;
-
- while ( edge )
- {
- edge2 = edge;
- edge = edge->nxt_edge;
- free ( edge );
- }
-
- free ( tmp );
+ edge_node *edge, *edge2;
+ edge = tmp->kmer_info.left;
+
+ while ( edge )
+ {
+ edge2 = edge;
+ edge = edge->nxt_edge;
+ free ( edge );
+ }
+
+ edge = tmp->kmer_info.right;
+
+ while ( edge )
+ {
+ edge2 = edge;
+ edge = edge->nxt_edge;
+ free ( edge );
+ }
+
+ free ( tmp );
}
-inline void free_hashtable ( hashtable2 * ht )
+inline void free_hashtable ( hashtable2 *ht )
{
- bucket2 * tmp, *tmp2;
+ bucket2 *tmp, *tmp2;
- for ( size_t i = 0; i < ht->ht_sz; ++i )
- {
- tmp = ( ht->store_pos ) [i];
+ for ( size_t i = 0; i < ht->ht_sz; ++i )
+ {
+ tmp = ( ht->store_pos ) [i];
- while ( tmp )
- {
- tmp2 = tmp;
- tmp = tmp->nxt_bucket;
- free ( tmp2 );
- }
- }
+ while ( tmp )
+ {
+ tmp2 = tmp;
+ tmp = tmp->nxt_bucket;
+ free ( tmp2 );
+ }
+ }
- free ( ht->store_pos );
+ free ( ht->store_pos );
}
diff --git a/sparsePregraph/inc/faidx.h b/sparsePregraph/inc/faidx.h
index bb83210..849432a 100644
--- a/sparsePregraph/inc/faidx.h
+++ b/sparsePregraph/inc/faidx.h
@@ -43,58 +43,58 @@ typedef struct __faidx_t faidx_t;
extern "C" {
#endif
- /*!
- @abstract Build index for a FASTA or razip compressed FASTA file.
- @param fn FASTA file name
- @return 0 on success; or -1 on failure
- @discussion File "fn.fai" will be generated.
- */
- int fai_build ( const char * fn );
-
- /*!
- @abstract Distroy a faidx_t struct.
- @param fai Pointer to the struct to be destroyed
- */
- void fai_destroy ( faidx_t * fai );
-
- /*!
- @abstract Load index from "fn.fai".
- @param fn File name of the FASTA file
- */
- faidx_t * fai_load ( const char * fn );
-
- /*!
- @abstract Fetch the sequence in a region.
- @param fai Pointer to the faidx_t struct
- @param reg Region in the format "chr2:20,000-30,000"
- @param len Length of the region
- @return Pointer to the sequence; null on failure
-
- @discussion The returned sequence is allocated by malloc family
- and should be destroyed by end users by calling free() on it.
- */
- char * fai_fetch ( const faidx_t * fai, const char * reg, int * len );
-
- /*!
- @abstract Fetch the number of sequences.
- @param fai Pointer to the faidx_t struct
- @return The number of sequences
- */
- int faidx_fetch_nseq ( const faidx_t * fai );
-
- /*!
- @abstract Fetch the sequence in a region.
- @param fai Pointer to the faidx_t struct
- @param c_name Region name
- @param p_beg_i Beginning position number (zero-based)
- @param p_end_i End position number (zero-based)
- @param len Length of the region
- @return Pointer to the sequence; null on failure
-
- @discussion The returned sequence is allocated by malloc family
- and should be destroyed by end users by calling free() on it.
- */
- char * faidx_fetch_seq ( const faidx_t * fai, char * c_name, int p_beg_i, int p_end_i, int * len );
+/*!
+ @abstract Build index for a FASTA or razip compressed FASTA file.
+ @param fn FASTA file name
+ @return 0 on success; or -1 on failure
+ @discussion File "fn.fai" will be generated.
+ */
+int fai_build ( const char *fn );
+
+/*!
+ @abstract Distroy a faidx_t struct.
+ @param fai Pointer to the struct to be destroyed
+ */
+void fai_destroy ( faidx_t *fai );
+
+/*!
+ @abstract Load index from "fn.fai".
+ @param fn File name of the FASTA file
+ */
+faidx_t *fai_load ( const char *fn );
+
+/*!
+ @abstract Fetch the sequence in a region.
+ @param fai Pointer to the faidx_t struct
+ @param reg Region in the format "chr2:20,000-30,000"
+ @param len Length of the region
+ @return Pointer to the sequence; null on failure
+
+ @discussion The returned sequence is allocated by malloc family
+ and should be destroyed by end users by calling free() on it.
+ */
+char *fai_fetch ( const faidx_t *fai, const char *reg, int *len );
+
+/*!
+ @abstract Fetch the number of sequences.
+ @param fai Pointer to the faidx_t struct
+ @return The number of sequences
+ */
+int faidx_fetch_nseq ( const faidx_t *fai );
+
+/*!
+ @abstract Fetch the sequence in a region.
+ @param fai Pointer to the faidx_t struct
+ @param c_name Region name
+ @param p_beg_i Beginning position number (zero-based)
+ @param p_end_i End position number (zero-based)
+ @param len Length of the region
+ @return Pointer to the sequence; null on failure
+
+ @discussion The returned sequence is allocated by malloc family
+ and should be destroyed by end users by calling free() on it.
+ */
+char *faidx_fetch_seq ( const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len );
#ifdef __cplusplus
}
diff --git a/sparsePregraph/inc/glf.h b/sparsePregraph/inc/glf.h
index 24eb4d9..067fddb 100644
--- a/sparsePregraph/inc/glf.h
+++ b/sparsePregraph/inc/glf.h
@@ -3,15 +3,15 @@
typedef struct
{
- unsigned char ref_base: 4, dummy: 4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
- unsigned char max_mapQ; /** maximum mapping quality */
- unsigned char lk[10]; /** log likelihood ratio, capped at 255 */
- unsigned min_lk: 8, depth: 24; /** minimum lk capped at 255, and the number of mapped reads */
+ unsigned char ref_base: 4, dummy: 4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
+ unsigned char max_mapQ; /** maximum mapping quality */
+ unsigned char lk[10]; /** log likelihood ratio, capped at 255 */
+ unsigned min_lk: 8, depth: 24; /** minimum lk capped at 255, and the number of mapped reads */
} glf1_t;
#include <stdint.h>
#include "bgzf.h"
-typedef BGZF * glfFile;
+typedef BGZF *glfFile;
#define GLF3_RTYPE_END 0
#define GLF3_RTYPE_SUB 1
@@ -19,21 +19,21 @@ typedef BGZF * glfFile;
typedef struct
{
- uint8_t ref_base: 4, rtype: 4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
- uint8_t rms_mapQ; /** RMS mapping quality */
- uint8_t lk[10]; /** log likelihood ratio, capped at 255 */
- uint32_t min_lk: 8, depth: 24; /** minimum lk capped at 255, and the number of mapped reads */
- int32_t offset; /** the first base in a chromosome has offset zero. */
- // for indel (lkHom1, lkHom2 and lkHet are the first three elements in lk[10])
- int16_t indel_len[2];
- int32_t max_len; // maximum indel len; will be modified by glf3_read1()
- char * indel_seq[2];
+ uint8_t ref_base: 4, rtype: 4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
+ uint8_t rms_mapQ; /** RMS mapping quality */
+ uint8_t lk[10]; /** log likelihood ratio, capped at 255 */
+ uint32_t min_lk: 8, depth: 24; /** minimum lk capped at 255, and the number of mapped reads */
+ int32_t offset; /** the first base in a chromosome has offset zero. */
+ // for indel (lkHom1, lkHom2 and lkHet are the first three elements in lk[10])
+ int16_t indel_len[2];
+ int32_t max_len; // maximum indel len; will be modified by glf3_read1()
+ char *indel_seq[2];
} glf3_t;
typedef struct
{
- int32_t l_text;
- uint8_t * text;
+ int32_t l_text;
+ uint8_t *text;
} glf3_header_t;
#ifdef __cplusplus
@@ -43,14 +43,14 @@ extern "C" {
#define glf3_init1() ((glf3_t*)calloc(1, sizeof(glf3_t)))
#define glf3_destroy1(g3) do { free((g3)->indel_seq[0]); free((g3)->indel_seq[1]); free(g3); } while (0)
- glf3_header_t * glf3_header_init();
- glf3_header_t * glf3_header_read ( glfFile fp );
- void glf3_header_write ( glfFile fp, const glf3_header_t * h );
- void glf3_header_destroy ( glf3_header_t * h );
- char * glf3_ref_read ( glfFile fp, int * len );
- void glf3_ref_write ( glfFile fp, const char * name, int len );
- int glf3_write1 ( glfFile fp, const glf3_t * g3 );
- int glf3_read1 ( glfFile fp, glf3_t * g3 );
+glf3_header_t *glf3_header_init();
+glf3_header_t *glf3_header_read ( glfFile fp );
+void glf3_header_write ( glfFile fp, const glf3_header_t *h );
+void glf3_header_destroy ( glf3_header_t *h );
+char *glf3_ref_read ( glfFile fp, int *len );
+void glf3_ref_write ( glfFile fp, const char *name, int len );
+int glf3_write1 ( glfFile fp, const glf3_t *g3 );
+int glf3_read1 ( glfFile fp, glf3_t *g3 );
#ifdef __cplusplus
}
diff --git a/sparsePregraph/inc/global.h b/sparsePregraph/inc/global.h
index eebaeca..0ef2462 100644
--- a/sparsePregraph/inc/global.h
+++ b/sparsePregraph/inc/global.h
@@ -1,7 +1,7 @@
/*
* inc/global.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -47,17 +47,17 @@ extern int run_mode;
extern int thrd_num_s;
-extern size_t * edge_cnt_total ; //used int lock strategy
-extern size_t * bucket_count_total ; //used in lock strategy
+extern size_t *edge_cnt_total ; //used int lock strategy
+extern size_t *bucket_count_total ; //used in lock strategy
//for io thread @see io_func.h
-extern string * seq_t;
+extern string *seq_t;
extern int io_ready;
extern int read_num;
-extern string * read_buf0;
-extern string * read_buf1;
+extern string *read_buf0;
+extern string *read_buf1;
extern int io_stat0; //must be one, if io_stat0 =0 ,the io thread will work immediately
extern int io_stat1;
@@ -67,17 +67,17 @@ extern int max_rd_len;
extern int min_rd_len;
//for the lock strategy ...
-extern pthread_spinlock_t * locks;
+extern pthread_spinlock_t *locks;
-extern unsigned int * mark_on_edge;
-extern pthread_spinlock_t * s_locks;
-extern struct edge_path_buffer ** path_buffer;
+extern unsigned int *mark_on_edge;
+extern pthread_spinlock_t *s_locks;
+extern struct edge_path_buffer **path_buffer;
extern unsigned long long buff_size;
extern unsigned int max_path_length; //max_path_length-1 is the real max path length, because the first int of buffer record the path length
-extern FILE * mark_fp ; //
-extern FILE * path_fp ; //
+extern FILE *mark_fp ; //
+extern FILE *path_fp ; //
extern pthread_mutex_t file_lock;//
extern int debug;
diff --git a/sparsePregraph/inc/io_func.h b/sparsePregraph/inc/io_func.h
index 24066fd..67d464d 100644
--- a/sparsePregraph/inc/io_func.h
+++ b/sparsePregraph/inc/io_func.h
@@ -1,7 +1,7 @@
/*
* inc/io_func.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -29,16 +29,16 @@
struct io_para_main
{
- int read_buf_sz;
- vector<string> *in_filenames_vt;
+ int read_buf_sz;
+ vector<string> *in_filenames_vt;
};
void sendIOWorkSignal();
-void * run_io_thread_main ( void * arg );
-void filter_N ( string & seq_s, int & bad_flag );
-void read_lib ( vector<string> &filenames, char * lib_file );
-void * open_file_robust ( const char * filetype, const char * path, const char * mode );
+void *run_io_thread_main ( void *arg );
+void filter_N ( string &seq_s, int &bad_flag );
+void read_lib ( vector<string> &filenames, char *lib_file );
+void *open_file_robust ( const char *filetype, const char *path, const char *mode );
#endif
diff --git a/sparsePregraph/inc/knetfile.h b/sparsePregraph/inc/knetfile.h
index ef70527..4b021d4 100644
--- a/sparsePregraph/inc/knetfile.h
+++ b/sparsePregraph/inc/knetfile.h
@@ -23,18 +23,18 @@
typedef struct knetFile_s
{
- int type, fd;
- int64_t offset;
- char * host, *port;
-
- // the following are for FTP only
- int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
- char * response, *retr, *size_cmd;
- int64_t seek_offset; // for lazy seek
- int64_t file_size;
-
- // the following are for HTTP only
- char * path, *http_host;
+ int type, fd;
+ int64_t offset;
+ char *host, *port;
+
+ // the following are for FTP only
+ int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
+ char *response, *retr, *size_cmd;
+ int64_t seek_offset; // for lazy seek
+ int64_t file_size;
+
+ // the following are for HTTP only
+ char *path, *http_host;
} knetFile;
#define knet_tell(fp) ((fp)->offset)
@@ -45,29 +45,29 @@ extern "C" {
#endif
#ifdef _WIN32
- int knet_win32_init();
- void knet_win32_destroy();
+int knet_win32_init();
+void knet_win32_destroy();
#endif
- knetFile * knet_open ( const char * fn, const char * mode );
-
- /*
- This only works with local files.
- */
- knetFile * knet_dopen ( int fd, const char * mode );
-
- /*
- If ->is_ready==0, this routine updates ->fd; otherwise, it simply
- reads from ->fd.
- */
- off_t knet_read ( knetFile * fp, void * buf, off_t len );
-
- /*
- This routine only sets ->offset and ->is_ready=0. It does not
- communicate with the FTP server.
- */
- off_t knet_seek ( knetFile * fp, int64_t off, int whence );
- int knet_close ( knetFile * fp );
+knetFile *knet_open ( const char *fn, const char *mode );
+
+/*
+ This only works with local files.
+ */
+knetFile *knet_dopen ( int fd, const char *mode );
+
+/*
+ If ->is_ready==0, this routine updates ->fd; otherwise, it simply
+ reads from ->fd.
+ */
+off_t knet_read ( knetFile *fp, void *buf, off_t len );
+
+/*
+ This routine only sets ->offset and ->is_ready=0. It does not
+ communicate with the FTP server.
+ */
+off_t knet_seek ( knetFile *fp, int64_t off, int whence );
+int knet_close ( knetFile *fp );
#ifdef __cplusplus
}
diff --git a/sparsePregraph/inc/kstring.h b/sparsePregraph/inc/kstring.h
index 22dbd69..392d06c 100644
--- a/sparsePregraph/inc/kstring.h
+++ b/sparsePregraph/inc/kstring.h
@@ -13,61 +13,61 @@
#define KSTRING_T kstring_t
typedef struct __kstring_t
{
- size_t l, m;
- char * s;
+ size_t l, m;
+ char *s;
} kstring_t;
#endif
-int ksprintf ( kstring_t * s, const char * fmt, ... );
-int ksplit_core ( char * s, int delimiter, int * _max, int ** _offsets );
+int ksprintf ( kstring_t *s, const char *fmt, ... );
+int ksplit_core ( char *s, int delimiter, int *_max, int **_offsets );
// calculate the auxiliary array, allocated by calloc()
-int * ksBM_prep ( const uint8_t * pat, int m );
+int *ksBM_prep ( const uint8_t *pat, int m );
/* Search pat in str and returned the list of matches. The size of the
* list is returned as n_matches. _prep is the array returned by
* ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */
-int * ksBM_search ( const uint8_t * str, int n, const uint8_t * pat, int m, int * _prep, int * n_matches );
+int *ksBM_search ( const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches );
-static inline int kputsn ( const char * p, int l, kstring_t * s )
+static inline int kputsn ( const char *p, int l, kstring_t *s )
{
- if ( s->l + l + 1 >= s->m )
- {
- s->m = s->l + l + 2;
- kroundup32 ( s->m );
- s->s = ( char * ) realloc ( s->s, s->m );
- }
+ if ( s->l + l + 1 >= s->m )
+ {
+ s->m = s->l + l + 2;
+ kroundup32 ( s->m );
+ s->s = ( char * ) realloc ( s->s, s->m );
+ }
- strncpy ( s->s + s->l, p, l );
- s->l += l;
- s->s[s->l] = 0;
- return l;
+ strncpy ( s->s + s->l, p, l );
+ s->l += l;
+ s->s[s->l] = 0;
+ return l;
}
-static inline int kputs ( const char * p, kstring_t * s )
+static inline int kputs ( const char *p, kstring_t *s )
{
- return kputsn ( p, strlen ( p ), s );
+ return kputsn ( p, strlen ( p ), s );
}
-static inline int kputc ( int c, kstring_t * s )
+static inline int kputc ( int c, kstring_t *s )
{
- if ( s->l + 1 >= s->m )
- {
- s->m = s->l + 2;
- kroundup32 ( s->m );
- s->s = ( char * ) realloc ( s->s, s->m );
- }
+ if ( s->l + 1 >= s->m )
+ {
+ s->m = s->l + 2;
+ kroundup32 ( s->m );
+ s->s = ( char * ) realloc ( s->s, s->m );
+ }
- s->s[s->l++] = c;
- s->s[s->l] = 0;
- return c;
+ s->s[s->l++] = c;
+ s->s[s->l] = 0;
+ return c;
}
-static inline int * ksplit ( kstring_t * s, int delimiter, int * n )
+static inline int *ksplit ( kstring_t *s, int delimiter, int *n )
{
- int max = 0, *offsets = 0;
- *n = ksplit_core ( s->s, delimiter, &max, &offsets );
- return offsets;
+ int max = 0, *offsets = 0;
+ *n = ksplit_core ( s->s, delimiter, &max, &offsets );
+ return offsets;
}
#endif
diff --git a/sparsePregraph/inc/multi_threads.h b/sparsePregraph/inc/multi_threads.h
index 2155d91..a3794a1 100644
--- a/sparsePregraph/inc/multi_threads.h
+++ b/sparsePregraph/inc/multi_threads.h
@@ -1,7 +1,7 @@
/*
* inc/multi_threads.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -26,21 +26,21 @@
typedef struct parameter
{
- unsigned char threadID;
- struct hashtable2 * ht;
- struct preArc_array * preArcs; //for building preArc ...
- struct vertex_hash2 * v_ht; //for building preArc ...
- int cut_off_len;
- int K_size;
- int gap;
- unsigned char * mainSignal;
- unsigned char * selfSignal;
+ unsigned char threadID;
+ struct hashtable2 *ht;
+ struct preArc_array *preArcs; //for building preArc ...
+ struct vertex_hash2 *v_ht; //for building preArc ...
+ int cut_off_len;
+ int K_size;
+ int gap;
+ unsigned char *mainSignal;
+ unsigned char *selfSignal;
} PARAMETER;
-void creatThrds ( pthread_t * threads, PARAMETER * paras );
-void * threadRoutine ( void * para );
-void thread_wait ( pthread_t * threads );
-void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals );
+void creatThrds ( pthread_t *threads, PARAMETER *paras );
+void *threadRoutine ( void *para );
+void thread_wait ( pthread_t *threads );
+void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals );
#endif
diff --git a/sparsePregraph/inc/razf.h b/sparsePregraph/inc/razf.h
index 425257e..dcbdf9e 100644
--- a/sparsePregraph/inc/razf.h
+++ b/sparsePregraph/inc/razf.h
@@ -66,10 +66,10 @@ typedef struct _gz_header_s _gz_header;
typedef struct
{
- uint32_t * cell_offsets; // i
- int64_t * bin_offsets; // i / BIN_SIZE
- int size;
- int cap;
+ uint32_t *cell_offsets; // i
+ int64_t *bin_offsets; // i / BIN_SIZE
+ int size;
+ int cap;
} ZBlockIndex;
/* When storing index, output bytes in Big-Endian everywhere */
@@ -79,56 +79,56 @@ typedef struct
typedef struct RandomAccessZFile
{
- char mode; /* 'w' : write mode; 'r' : read mode */
- int file_type;
- /* plain file or rz file, razf_read support plain file as input too, in this case, razf_read work as buffered fread */
+ char mode; /* 'w' : write mode; 'r' : read mode */
+ int file_type;
+ /* plain file or rz file, razf_read support plain file as input too, in this case, razf_read work as buffered fread */
#ifdef _USE_KNETFILE
- union
- {
- knetFile * fpr;
- int fpw;
- } x;
+ union
+ {
+ knetFile *fpr;
+ int fpw;
+ } x;
#else
- int filedes; /* the file descriptor */
+ int filedes; /* the file descriptor */
#endif
- z_stream * stream;
- ZBlockIndex * index;
- int64_t in, out, end, src_end;
- /* in: n bytes total in; out: n bytes total out; */
- /* end: the end of all data blocks, while the start of index; src_end: the true end position in uncompressed file */
- int buf_flush; // buffer should be flush, suspend inflate util buffer is empty
- int64_t block_pos, block_off, next_block_pos;
- /* block_pos: the start postiion of current block in compressed file */
- /* block_off: tell how many bytes have been read from current block */
- void * inbuf, *outbuf;
- int header_size;
- gz_header * header;
- /* header is used to transfer inflate_state->mode from HEAD to TYPE after call inflateReset */
- int buf_off, buf_len;
- int z_err, z_eof;
- int seekable;
- /* Indice where the source is seekable */
- int load_index;
- /* set has_index to 0 in mode 'w', then index will be discarded */
+ z_stream *stream;
+ ZBlockIndex *index;
+ int64_t in, out, end, src_end;
+ /* in: n bytes total in; out: n bytes total out; */
+ /* end: the end of all data blocks, while the start of index; src_end: the true end position in uncompressed file */
+ int buf_flush; // buffer should be flush, suspend inflate util buffer is empty
+ int64_t block_pos, block_off, next_block_pos;
+ /* block_pos: the start postiion of current block in compressed file */
+ /* block_off: tell how many bytes have been read from current block */
+ void *inbuf, *outbuf;
+ int header_size;
+ gz_header *header;
+ /* header is used to transfer inflate_state->mode from HEAD to TYPE after call inflateReset */
+ int buf_off, buf_len;
+ int z_err, z_eof;
+ int seekable;
+ /* Indice where the source is seekable */
+ int load_index;
+ /* set has_index to 0 in mode 'w', then index will be discarded */
} RAZF;
#ifdef __cplusplus
extern "C" {
#endif
- RAZF * razf_dopen ( int data_fd, const char * mode );
- RAZF * razf_open ( const char * fn, const char * mode );
- int razf_write ( RAZF * rz, const void * data, int size );
- int razf_read ( RAZF * rz, void * data, int size );
- int64_t razf_seek ( RAZF * rz, int64_t pos, int where );
- void razf_close ( RAZF * rz );
+RAZF *razf_dopen ( int data_fd, const char *mode );
+RAZF *razf_open ( const char *fn, const char *mode );
+int razf_write ( RAZF *rz, const void *data, int size );
+int razf_read ( RAZF *rz, void *data, int size );
+int64_t razf_seek ( RAZF *rz, int64_t pos, int where );
+void razf_close ( RAZF *rz );
#define razf_tell(rz) ((rz)->out)
- RAZF * razf_open2 ( const char * filename, const char * mode );
- RAZF * razf_dopen2 ( int fd, const char * mode );
- uint64_t razf_tell2 ( RAZF * rz );
- int64_t razf_seek2 ( RAZF * rz, uint64_t voffset, int where );
+RAZF *razf_open2 ( const char *filename, const char *mode );
+RAZF *razf_dopen2 ( int fd, const char *mode );
+uint64_t razf_tell2 ( RAZF *rz );
+int64_t razf_seek2 ( RAZF *rz, uint64_t voffset, int where );
#ifdef __cplusplus
}
diff --git a/sparsePregraph/inc/sam_header.h b/sparsePregraph/inc/sam_header.h
index c1d3ad9..f8a1e50 100644
--- a/sparsePregraph/inc/sam_header.h
+++ b/sparsePregraph/inc/sam_header.h
@@ -5,17 +5,17 @@
extern "C" {
#endif
- void * sam_header_parse2 ( const char * headerText );
- void * sam_header_merge ( int n, const void ** dicts );
- void sam_header_free ( void * header );
- char * sam_header_write ( const void * headerDict ); // returns a newly allocated string
+void *sam_header_parse2 ( const char *headerText );
+void *sam_header_merge ( int n, const void **dicts );
+void sam_header_free ( void *header );
+char *sam_header_write ( const void *headerDict ); // returns a newly allocated string
- char ** sam_header2list ( const void * _dict, char type[2], char key_tag[2], int * _n );
+char **sam_header2list ( const void *_dict, char type[2], char key_tag[2], int *_n );
- void * sam_header2tbl ( const void * dict, char type[2], char key_tag[2], char value_tag[2] );
- const char * sam_tbl_get ( void * h, const char * key );
- int sam_tbl_size ( void * h );
- void sam_tbl_destroy ( void * h );
+void *sam_header2tbl ( const void *dict, char type[2], char key_tag[2], char value_tag[2] );
+const char *sam_tbl_get ( void *h, const char *key );
+int sam_tbl_size ( void *h );
+void sam_tbl_destroy ( void *h );
#ifdef __cplusplus
}
diff --git a/sparsePregraph/inc/sam_view.h b/sparsePregraph/inc/sam_view.h
index 0663765..33d826f 100644
--- a/sparsePregraph/inc/sam_view.h
+++ b/sparsePregraph/inc/sam_view.h
@@ -3,53 +3,64 @@
static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;
-static char * g_library, *g_rg;
+static char *g_library, *g_rg;
static int g_sol2sanger_tbl[128];
-static void sol2sanger ( bam1_t * b )
+static void sol2sanger ( bam1_t *b )
{
- int l;
- uint8_t * qual = bam1_qual ( b );
+ int l;
+ uint8_t *qual = bam1_qual ( b );
- if ( g_sol2sanger_tbl[30] == 0 )
- {
- for ( l = 0; l != 128; ++l )
- {
- g_sol2sanger_tbl[l] = ( int ) ( 10.0 * log ( 1.0 + pow ( 10.0, ( l - 64 + 33 ) / 10.0 ) ) / log ( 10.0 ) + .499 );
+ if ( g_sol2sanger_tbl[30] == 0 )
+ {
+ for ( l = 0; l != 128; ++l )
+ {
+ g_sol2sanger_tbl[l] = ( int ) ( 10.0 * log ( 1.0 + pow ( 10.0, ( l - 64 + 33 ) / 10.0 ) ) / log ( 10.0 ) + .499 );
- if ( g_sol2sanger_tbl[l] >= 93 ) { g_sol2sanger_tbl[l] = 93; }
- }
- }
+ if ( g_sol2sanger_tbl[l] >= 93 )
+ {
+ g_sol2sanger_tbl[l] = 93;
+ }
+ }
+ }
- for ( l = 0; l < b->core.l_qseq; ++l )
- {
- int q = qual[l];
+ for ( l = 0; l < b->core.l_qseq; ++l )
+ {
+ int q = qual[l];
- if ( q > 127 ) { q = 127; }
+ if ( q > 127 )
+ {
+ q = 127;
+ }
- qual[l] = g_sol2sanger_tbl[q];
- }
+ qual[l] = g_sol2sanger_tbl[q];
+ }
}
-static inline int __g_skip_aln ( const bam_header_t * h, const bam1_t * b )
+static inline int __g_skip_aln ( const bam_header_t *h, const bam1_t *b )
{
- if ( b->core.qual < g_min_mapQ || ( ( b->core.flag & g_flag_on ) != g_flag_on ) || ( b->core.flag & g_flag_off ) )
- { return 1; }
+ if ( b->core.qual < g_min_mapQ || ( ( b->core.flag & g_flag_on ) != g_flag_on ) || ( b->core.flag & g_flag_off ) )
+ {
+ return 1;
+ }
- if ( g_rg )
- {
- uint8_t * s = bam_aux_get ( b, "RG" );
+ if ( g_rg )
+ {
+ uint8_t *s = bam_aux_get ( b, "RG" );
- if ( s && strcmp ( g_rg, ( char * ) ( s + 1 ) ) == 0 ) { return 0; }
- }
+ if ( s && strcmp ( g_rg, ( char * ) ( s + 1 ) ) == 0 )
+ {
+ return 0;
+ }
+ }
- if ( g_library )
- {
- const char * p = bam_get_library ( ( bam_header_t * ) h, b );
- return ( p && strcmp ( p, g_library ) == 0 ) ? 0 : 1;
- }
+ if ( g_library )
+ {
+ const char *p = bam_get_library ( ( bam_header_t * ) h, b );
+ return ( p && strcmp ( p, g_library ) == 0 ) ? 0 : 1;
+ }
- return 0;
+ return 0;
}
diff --git a/sparsePregraph/inc/seq_util.h b/sparsePregraph/inc/seq_util.h
index 6509763..e9a1d68 100644
--- a/sparsePregraph/inc/seq_util.h
+++ b/sparsePregraph/inc/seq_util.h
@@ -15,486 +15,539 @@
#include "core.h"
-extern inline void Init_Read ( string & seq, struct read_t & read );
+extern inline void Init_Read ( string &seq, struct read_t &read );
-extern inline uint64_t * str2bitsarr ( const char * c_str, int len, uint64_t * b_str, int arr_sz );
+extern inline uint64_t *str2bitsarr ( const char *c_str, int len, uint64_t *b_str, int arr_sz );
-extern inline char * bitsarr2str ( uint64_t * b_seq, int len, char * c_str, int arr_sz );
+extern inline char *bitsarr2str ( uint64_t *b_seq, int len, char *c_str, int arr_sz );
-extern inline void get_sub_arr ( uint64_t * bitsarr_in, int bitsarr_len, int begin_pos, int sub_sz, uint64_t * bitsarr_out );
+extern inline void get_sub_arr ( uint64_t *bitsarr_in, int bitsarr_len, int begin_pos, int sub_sz, uint64_t *bitsarr_out );
-extern inline void L_shift_NC ( uint64_t * bitsarr, int shift_sz, int arr_sz );
+extern inline void L_shift_NC ( uint64_t *bitsarr, int shift_sz, int arr_sz );
-extern inline void R_shift_NC ( uint64_t * bitsarr, int shift_sz, int arr_sz );
+extern inline void R_shift_NC ( uint64_t *bitsarr, int shift_sz, int arr_sz );
-extern inline int uint64_t_cmp ( uint64_t * A, uint64_t * B, int Kmer_arr_sz );
+extern inline int uint64_t_cmp ( uint64_t *A, uint64_t *B, int Kmer_arr_sz );
-extern inline uint64_t * get_rev_comp_seq_arr ( uint64_t * seq_arr, int seq_size, int arr_sz );
+extern inline uint64_t *get_rev_comp_seq_arr ( uint64_t *seq_arr, int seq_size, int arr_sz );
extern inline uint64_t get_rev_comp_seq ( uint64_t seq, int seq_size );
-extern inline uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed );
+extern inline uint64_t MurmurHash64A ( const void *key, int len, unsigned int seed );
-inline void Init_Read ( string & seq, struct read_t & read )
+inline void Init_Read ( string &seq, struct read_t &read )
{
- read.readLen = ( int ) seq.size();
- int Read_arr_sz = read.readLen / 32 + 1;
- int rem = read.readLen % 32;
+ read.readLen = ( int ) seq.size();
+ int Read_arr_sz = read.readLen / 32 + 1;
+ int rem = read.readLen % 32;
- if ( rem == 0 )
- {Read_arr_sz--;}
+ if ( rem == 0 )
+ {
+ Read_arr_sz--;
+ }
- str2bitsarr ( seq.c_str(), ( int ) seq.size(), read.read_bits, Read_arr_sz );
+ str2bitsarr ( seq.c_str(), ( int ) seq.size(), read.read_bits, Read_arr_sz );
}
-inline uint64_t * str2bitsarr ( const char * c_str, int len, uint64_t * b_str, int arr_sz )
+inline uint64_t *str2bitsarr ( const char *c_str, int len, uint64_t *b_str, int arr_sz )
{
- for ( int k = 0; k < arr_sz; ++k )
- {
- b_str[k] = 0;
- }
-
- int arr_sz_needed = len / 32 + 1;
- int rem = len % 32;
-
- if ( rem == 0 )
- {arr_sz_needed--;}
-
- int beg_arr_idx = arr_sz - arr_sz_needed;
-
- if ( rem == 0 && arr_sz_needed > 0 )
- {rem = 32;}
-
- for ( int k = 0; k < len; k++ )
- {
- if ( rem == 0 )
- {beg_arr_idx++; rem = 32;}
-
- switch ( c_str[k] )
- {
- case ( 'A' ) :
- case ( 'a' ) :
- case ( '0' ) :
- b_str[beg_arr_idx] <<= 2;
- rem--;
- break;
- case ( 'C' ) :
- case ( 'c' ) :
- case ( '1' ) :
- b_str[beg_arr_idx] <<= 2;
- ++b_str[beg_arr_idx];
- rem--;
- break;
- case 'G':
- case 'g':
- case '2':
- b_str[beg_arr_idx] <<= 1;
- ++b_str[beg_arr_idx];
- b_str[beg_arr_idx] <<= 1;
- rem--;
- break;
- case 'T':
- case 't':
- case '3':
- b_str[beg_arr_idx] <<= 1;
- ++b_str[beg_arr_idx];
- b_str[beg_arr_idx] <<= 1;
- ++b_str[beg_arr_idx];
- rem--;
- break;
- default:
- return b_str;
- }
- }
-
- return b_str;
+ for ( int k = 0; k < arr_sz; ++k )
+ {
+ b_str[k] = 0;
+ }
+
+ int arr_sz_needed = len / 32 + 1;
+ int rem = len % 32;
+
+ if ( rem == 0 )
+ {
+ arr_sz_needed--;
+ }
+
+ int beg_arr_idx = arr_sz - arr_sz_needed;
+
+ if ( rem == 0 && arr_sz_needed > 0 )
+ {
+ rem = 32;
+ }
+
+ for ( int k = 0; k < len; k++ )
+ {
+ if ( rem == 0 )
+ {
+ beg_arr_idx++;
+ rem = 32;
+ }
+
+ switch ( c_str[k] )
+ {
+ case ( 'A' ) :
+ case ( 'a' ) :
+ case ( '0' ) :
+ b_str[beg_arr_idx] <<= 2;
+ rem--;
+ break;
+
+ case ( 'C' ) :
+ case ( 'c' ) :
+ case ( '1' ) :
+ b_str[beg_arr_idx] <<= 2;
+ ++b_str[beg_arr_idx];
+ rem--;
+ break;
+
+ case 'G':
+ case 'g':
+ case '2':
+ b_str[beg_arr_idx] <<= 1;
+ ++b_str[beg_arr_idx];
+ b_str[beg_arr_idx] <<= 1;
+ rem--;
+ break;
+
+ case 'T':
+ case 't':
+ case '3':
+ b_str[beg_arr_idx] <<= 1;
+ ++b_str[beg_arr_idx];
+ b_str[beg_arr_idx] <<= 1;
+ ++b_str[beg_arr_idx];
+ rem--;
+ break;
+
+ default:
+ return b_str;
+ }
+ }
+
+ return b_str;
}
-inline char * bitsarr2str ( uint64_t * b_seq, int len, char * c_str, int arr_sz )
+inline char *bitsarr2str ( uint64_t *b_seq, int len, char *c_str, int arr_sz )
{
- int tot_bits = arr_sz * 64;
-
- for ( int i = 0; i < len; ++i )
- {
- uint64_t temp, temp2[100];
-
- for ( int k = 0; k < arr_sz; ++k )
- {
- temp2[k] = b_seq[k];
- }
-
- L_shift_NC ( temp2, tot_bits - ( len - i ) * 2, arr_sz );
- R_shift_NC ( temp2, tot_bits - 2, arr_sz );
- temp = temp2[arr_sz - 1];
-
- switch ( temp )
- {
- case 0:
- c_str[i] = 'A';
- break;
- case 1:
- c_str[i] = 'C';
- break;
- case 2:
- c_str[i] = 'G';
- break;
- case 3:
- c_str[i] = 'T';
- break;
- }
- }
-
- c_str[len] = '\0';
- return c_str;
+ int tot_bits = arr_sz * 64;
+
+ for ( int i = 0; i < len; ++i )
+ {
+ uint64_t temp, temp2[100];
+
+ for ( int k = 0; k < arr_sz; ++k )
+ {
+ temp2[k] = b_seq[k];
+ }
+
+ L_shift_NC ( temp2, tot_bits - ( len - i ) * 2, arr_sz );
+ R_shift_NC ( temp2, tot_bits - 2, arr_sz );
+ temp = temp2[arr_sz - 1];
+
+ switch ( temp )
+ {
+ case 0:
+ c_str[i] = 'A';
+ break;
+
+ case 1:
+ c_str[i] = 'C';
+ break;
+
+ case 2:
+ c_str[i] = 'G';
+ break;
+
+ case 3:
+ c_str[i] = 'T';
+ break;
+ }
+ }
+
+ c_str[len] = '\0';
+ return c_str;
}
-inline void get_sub_arr ( uint64_t * bitsarr_in, int bitsarr_len, int begin_pos, int sub_sz, uint64_t * bitsarr_out )
+inline void get_sub_arr ( uint64_t *bitsarr_in, int bitsarr_len, int begin_pos, int sub_sz, uint64_t *bitsarr_out )
{
- if ( bitsarr_len < sub_sz )
- {cout << "Error! Input kmer too short." << bitsarr_len << " " << sub_sz << endl; return;}
-
- int arr_sz_in = bitsarr_len / 32 + 1;
- int rem = bitsarr_len % 32;
-
- if ( rem == 0 )
- {arr_sz_in--;}
-
- int arr_sz_out = sub_sz / 32 + 1;
-
- if ( sub_sz % 32 == 0 )
- {arr_sz_out--;}
-
- uint64_t temp_arr[10];
- memset ( temp_arr, 0, sizeof ( temp_arr ) );
- memset ( bitsarr_out, 0, sizeof ( uint64_t ) *arr_sz_out );
- int rem2 = ( 32 - rem + begin_pos ) % 32;
- int block_beg = ( 32 - rem + begin_pos ) / 32;
-
- if ( rem == 0 )
- {block_beg--;}
-
- int rem3 = ( 32 - rem + begin_pos + sub_sz ) % 32;
- int block_end = ( 32 - rem + begin_pos + sub_sz ) / 32;
-
- if ( rem3 != 0 )
- {rem3 = 32 - rem3;}
- else
- {
- block_end--;
- }
-
- if ( rem == 0 )
- {block_end--;}
-
- int orig_sz = ( block_end - block_beg + 1 );
- memcpy ( temp_arr, &bitsarr_in[block_beg], orig_sz * sizeof ( uint64_t ) );
- L_shift_NC ( temp_arr, rem2 * 2, orig_sz );
- R_shift_NC ( temp_arr, ( rem2 + rem3 ) % 32 * 2, arr_sz_out );
- memcpy ( bitsarr_out, temp_arr, arr_sz_out * sizeof ( uint64_t ) );
+ if ( bitsarr_len < sub_sz )
+ {
+ cout << "Error! Input kmer too short." << bitsarr_len << " " << sub_sz << endl;
+ return;
+ }
+
+ int arr_sz_in = bitsarr_len / 32 + 1;
+ int rem = bitsarr_len % 32;
+
+ if ( rem == 0 )
+ {
+ arr_sz_in--;
+ }
+
+ int arr_sz_out = sub_sz / 32 + 1;
+
+ if ( sub_sz % 32 == 0 )
+ {
+ arr_sz_out--;
+ }
+
+ uint64_t temp_arr[10];
+ memset ( temp_arr, 0, sizeof ( temp_arr ) );
+ memset ( bitsarr_out, 0, sizeof ( uint64_t ) *arr_sz_out );
+ int rem2 = ( 32 - rem + begin_pos ) % 32;
+ int block_beg = ( 32 - rem + begin_pos ) / 32;
+
+ if ( rem == 0 )
+ {
+ block_beg--;
+ }
+
+ int rem3 = ( 32 - rem + begin_pos + sub_sz ) % 32;
+ int block_end = ( 32 - rem + begin_pos + sub_sz ) / 32;
+
+ if ( rem3 != 0 )
+ {
+ rem3 = 32 - rem3;
+ }
+ else
+ {
+ block_end--;
+ }
+
+ if ( rem == 0 )
+ {
+ block_end--;
+ }
+
+ int orig_sz = ( block_end - block_beg + 1 );
+ memcpy ( temp_arr, &bitsarr_in[block_beg], orig_sz * sizeof ( uint64_t ) );
+ L_shift_NC ( temp_arr, rem2 * 2, orig_sz );
+ R_shift_NC ( temp_arr, ( rem2 + rem3 ) % 32 * 2, arr_sz_out );
+ memcpy ( bitsarr_out, temp_arr, arr_sz_out * sizeof ( uint64_t ) );
}
-inline void L_shift_NC ( uint64_t * bitsarr, int shift_sz, int arr_sz )
+inline void L_shift_NC ( uint64_t *bitsarr, int shift_sz, int arr_sz )
{
- uint64_t temp_arr[100];
-
- for ( int i = 0; i < arr_sz; ++i )
- {
- temp_arr[i] = 0;
- }
-
- int jmp = shift_sz / 64;
- int offset = shift_sz % 64;
-
- for ( int i = 0; i < arr_sz; ++i )
- {
- if ( i + jmp + 1 < arr_sz )
- {
- uint64_t tt = 0;
-
- if ( offset == 0 )
- {
- tt = 0;
- }
- else
- {
- tt = ( bitsarr[i + jmp + 1] >> ( 64 - offset ) );
- }
-
- temp_arr[i] = ( ( bitsarr[i + jmp] << offset ) | tt );
- }
-
- if ( i + jmp + 1 == arr_sz )
- {temp_arr[i] = bitsarr[i + jmp] << offset;}
-
- if ( i + jmp + 1 > arr_sz )
- {temp_arr[i] = 0;}
- }
-
- for ( int i = 0; i < arr_sz; ++i )
- {
- bitsarr[i] = temp_arr[i];
- }
+ uint64_t temp_arr[100];
+
+ for ( int i = 0; i < arr_sz; ++i )
+ {
+ temp_arr[i] = 0;
+ }
+
+ int jmp = shift_sz / 64;
+ int offset = shift_sz % 64;
+
+ for ( int i = 0; i < arr_sz; ++i )
+ {
+ if ( i + jmp + 1 < arr_sz )
+ {
+ uint64_t tt = 0;
+
+ if ( offset == 0 )
+ {
+ tt = 0;
+ }
+ else
+ {
+ tt = ( bitsarr[i + jmp + 1] >> ( 64 - offset ) );
+ }
+
+ temp_arr[i] = ( ( bitsarr[i + jmp] << offset ) | tt );
+ }
+
+ if ( i + jmp + 1 == arr_sz )
+ {
+ temp_arr[i] = bitsarr[i + jmp] << offset;
+ }
+
+ if ( i + jmp + 1 > arr_sz )
+ {
+ temp_arr[i] = 0;
+ }
+ }
+
+ for ( int i = 0; i < arr_sz; ++i )
+ {
+ bitsarr[i] = temp_arr[i];
+ }
}
-inline void R_shift_NC ( uint64_t * bitsarr, int shift_sz, int arr_sz )
+inline void R_shift_NC ( uint64_t *bitsarr, int shift_sz, int arr_sz )
{
- uint64_t temp_arr[100];
-
- for ( int i = 0; i < arr_sz; ++i )
- {
- temp_arr[i] = 0;
- }
-
- int jmp = shift_sz / 64;
- int offset = shift_sz % 64;
-
- if ( offset == 0 ) //to fix the move 64bit bug
- {
- for ( int i = arr_sz - 1; i >= 0; --i )
- {
- if ( i - jmp > 0 )
- {temp_arr[i] = bitsarr[i - jmp];}
-
- if ( i - jmp == 0 )
- {temp_arr[i] = bitsarr[i - jmp];}
-
- if ( i - jmp < 0 )
- {temp_arr[i] = 0;}
- }
- }
- else
- {
- for ( int i = arr_sz - 1; i >= 0; --i )
- {
- if ( i - jmp > 0 )
- {temp_arr[i] = ( bitsarr[i - jmp] >> offset ) | ( bitsarr[i - jmp - 1] << ( 64 - offset ) );}
-
- if ( i - jmp == 0 )
- {temp_arr[i] = ( bitsarr[i - jmp] >> offset );}
-
- if ( i - jmp < 0 )
- {temp_arr[i] = 0;}
- }
- }
-
- for ( int i = 0; i < arr_sz; ++i )
- {
- bitsarr[i] = temp_arr[i];
- }
+ uint64_t temp_arr[100];
+
+ for ( int i = 0; i < arr_sz; ++i )
+ {
+ temp_arr[i] = 0;
+ }
+
+ int jmp = shift_sz / 64;
+ int offset = shift_sz % 64;
+
+ if ( offset == 0 ) //to fix the move 64bit bug
+ {
+ for ( int i = arr_sz - 1; i >= 0; --i )
+ {
+ if ( i - jmp > 0 )
+ {
+ temp_arr[i] = bitsarr[i - jmp];
+ }
+
+ if ( i - jmp == 0 )
+ {
+ temp_arr[i] = bitsarr[i - jmp];
+ }
+
+ if ( i - jmp < 0 )
+ {
+ temp_arr[i] = 0;
+ }
+ }
+ }
+ else
+ {
+ for ( int i = arr_sz - 1; i >= 0; --i )
+ {
+ if ( i - jmp > 0 )
+ {
+ temp_arr[i] = ( bitsarr[i - jmp] >> offset ) | ( bitsarr[i - jmp - 1] << ( 64 - offset ) );
+ }
+
+ if ( i - jmp == 0 )
+ {
+ temp_arr[i] = ( bitsarr[i - jmp] >> offset );
+ }
+
+ if ( i - jmp < 0 )
+ {
+ temp_arr[i] = 0;
+ }
+ }
+ }
+
+ for ( int i = 0; i < arr_sz; ++i )
+ {
+ bitsarr[i] = temp_arr[i];
+ }
}
-inline int uint64_t_cmp ( uint64_t * A, uint64_t * B, int Kmer_arr_sz )
+inline int uint64_t_cmp ( uint64_t *A, uint64_t *B, int Kmer_arr_sz )
{
- int flag = 0;
-
- for ( int jj = 0; jj < Kmer_arr_sz; ++jj )
- {
- if ( A[jj] > B[jj] )
- {
- flag = 1;
- break;
- }
-
- if ( A[jj] < B[jj] )
- {
- flag = -1;
- break;
- }
-
- if ( A[jj] == B[jj] )
- {
- continue;
- }
- }
-
- return flag;
+ int flag = 0;
+
+ for ( int jj = 0; jj < Kmer_arr_sz; ++jj )
+ {
+ if ( A[jj] > B[jj] )
+ {
+ flag = 1;
+ break;
+ }
+
+ if ( A[jj] < B[jj] )
+ {
+ flag = -1;
+ break;
+ }
+
+ if ( A[jj] == B[jj] )
+ {
+ continue;
+ }
+ }
+
+ return flag;
}
//for 63mer
-inline uint64_t * get_rev_comp_seq_arr ( uint64_t * seq_arr, int seq_size, int arr_sz )
+inline uint64_t *get_rev_comp_seq_arr ( uint64_t *seq_arr, int seq_size, int arr_sz )
{
- if ( seq_size < 32 && arr_sz == 2 )
- {
- seq_arr[1] = get_rev_comp_seq ( seq_arr[1], seq_size );
-
- if ( seq_arr[0] != 0 )
- {
- fprintf ( stderr, "ERROR: in get_rev_comp_seq_arr \n" );
- exit ( -1 );
- }
-
- return seq_arr;
- }
-
- int tot_bits = arr_sz * 64;
-
- for ( int i = 0; i < arr_sz; ++i )
- {
- seq_arr[i] = ~seq_arr[i];
- seq_arr[i] = ( ( seq_arr[i] & 0x3333333333333333 ) << 2 ) | ( ( seq_arr[i] & 0xCCCCCCCCCCCCCCCC ) >> 2 );
- seq_arr[i] = ( ( seq_arr[i] & 0x0F0F0F0F0F0F0F0F ) << 4 ) | ( ( seq_arr[i] & 0xF0F0F0F0F0F0F0F0 ) >> 4 );
- seq_arr[i] = ( ( seq_arr[i] & 0x00FF00FF00FF00FF ) << 8 ) | ( ( seq_arr[i] & 0xFF00FF00FF00FF00 ) >> 8 );
- seq_arr[i] = ( ( seq_arr[i] & 0x0000FFFF0000FFFF ) << 16 ) | ( ( seq_arr[i] & 0xFFFF0000FFFF0000 ) >> 16 );
- seq_arr[i] = ( ( seq_arr[i] & 0x00000000FFFFFFFF ) << 32 ) | ( ( seq_arr[i] & 0xFFFFFFFF00000000 ) >> 32 );
- }
-
- int j = 0, k = arr_sz - 1;
-
- for ( ; j < k; ++j, --k )
- {
- uint64_t temp;
- temp = seq_arr[j];
- seq_arr[j] = seq_arr[k];
- seq_arr[k] = temp;
- }
-
- R_shift_NC ( seq_arr, tot_bits - ( seq_size * 2 ), arr_sz );
- return seq_arr;
+ if ( seq_size < 32 && arr_sz == 2 )
+ {
+ seq_arr[1] = get_rev_comp_seq ( seq_arr[1], seq_size );
+
+ if ( seq_arr[0] != 0 )
+ {
+ fprintf ( stderr, "ERROR: in get_rev_comp_seq_arr \n" );
+ exit ( -1 );
+ }
+
+ return seq_arr;
+ }
+
+ int tot_bits = arr_sz * 64;
+
+ for ( int i = 0; i < arr_sz; ++i )
+ {
+ seq_arr[i] = ~seq_arr[i];
+ seq_arr[i] = ( ( seq_arr[i] & 0x3333333333333333 ) << 2 ) | ( ( seq_arr[i] & 0xCCCCCCCCCCCCCCCC ) >> 2 );
+ seq_arr[i] = ( ( seq_arr[i] & 0x0F0F0F0F0F0F0F0F ) << 4 ) | ( ( seq_arr[i] & 0xF0F0F0F0F0F0F0F0 ) >> 4 );
+ seq_arr[i] = ( ( seq_arr[i] & 0x00FF00FF00FF00FF ) << 8 ) | ( ( seq_arr[i] & 0xFF00FF00FF00FF00 ) >> 8 );
+ seq_arr[i] = ( ( seq_arr[i] & 0x0000FFFF0000FFFF ) << 16 ) | ( ( seq_arr[i] & 0xFFFF0000FFFF0000 ) >> 16 );
+ seq_arr[i] = ( ( seq_arr[i] & 0x00000000FFFFFFFF ) << 32 ) | ( ( seq_arr[i] & 0xFFFFFFFF00000000 ) >> 32 );
+ }
+
+ int j = 0, k = arr_sz - 1;
+
+ for ( ; j < k; ++j, --k )
+ {
+ uint64_t temp;
+ temp = seq_arr[j];
+ seq_arr[j] = seq_arr[k];
+ seq_arr[k] = temp;
+ }
+
+ R_shift_NC ( seq_arr, tot_bits - ( seq_size * 2 ), arr_sz );
+ return seq_arr;
}
inline uint64_t get_rev_comp_seq ( uint64_t seq, int seq_size )
{
- seq = ~seq;
- seq = ( ( seq & 0x3333333333333333 ) << 2 ) | ( ( seq & 0xCCCCCCCCCCCCCCCC ) >> 2 );
- seq = ( ( seq & 0x0F0F0F0F0F0F0F0F ) << 4 ) | ( ( seq & 0xF0F0F0F0F0F0F0F0 ) >> 4 );
- seq = ( ( seq & 0x00FF00FF00FF00FF ) << 8 ) | ( ( seq & 0xFF00FF00FF00FF00 ) >> 8 );
- seq = ( ( seq & 0x0000FFFF0000FFFF ) << 16 ) | ( ( seq & 0xFFFF0000FFFF0000 ) >> 16 );
- seq = ( ( seq & 0x00000000FFFFFFFF ) << 32 ) | ( ( seq & 0xFFFFFFFF00000000 ) >> 32 );
- return seq >> ( 64 - ( seq_size * 2 ) );
+ seq = ~seq;
+ seq = ( ( seq & 0x3333333333333333 ) << 2 ) | ( ( seq & 0xCCCCCCCCCCCCCCCC ) >> 2 );
+ seq = ( ( seq & 0x0F0F0F0F0F0F0F0F ) << 4 ) | ( ( seq & 0xF0F0F0F0F0F0F0F0 ) >> 4 );
+ seq = ( ( seq & 0x00FF00FF00FF00FF ) << 8 ) | ( ( seq & 0xFF00FF00FF00FF00 ) >> 8 );
+ seq = ( ( seq & 0x0000FFFF0000FFFF ) << 16 ) | ( ( seq & 0xFFFF0000FFFF0000 ) >> 16 );
+ seq = ( ( seq & 0x00000000FFFFFFFF ) << 32 ) | ( ( seq & 0xFFFFFFFF00000000 ) >> 32 );
+ return seq >> ( 64 - ( seq_size * 2 ) );
}
//for 64bit platform
-inline uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
+inline uint64_t MurmurHash64A ( const void *key, int len, unsigned int seed )
{
- const uint64_t m = 0xc6a4a7935bd1e995;
- const int r = 47;
- uint64_t h = seed ^ ( len * m );
- const uint64_t * data = ( const uint64_t * ) key;
- const uint64_t * end = data + ( len / 8 );
-
- while ( data != end )
- {
- uint64_t k = *data++;
- k *= m;
- k ^= k >> r;
- k *= m;
- h ^= k;
- h *= m;
- }
-
- const unsigned char * data2 = ( const unsigned char * ) data;
-
- switch ( len & 7 )
- {
- case 7:
- h ^= uint64_t ( data2[6] ) << 48;
- case 6:
- h ^= uint64_t ( data2[5] ) << 40;
- case 5:
- h ^= uint64_t ( data2[4] ) << 32;
- case 4:
- h ^= uint64_t ( data2[3] ) << 24;
- case 3:
- h ^= uint64_t ( data2[2] ) << 16;
- case 2:
- h ^= uint64_t ( data2[1] ) << 8;
- case 1:
- h ^= uint64_t ( data2[0] );
- h *= m;
- };
-
- h ^= h >> r;
-
- h *= m;
-
- h ^= h >> r;
-
- return h;
+ const uint64_t m = 0xc6a4a7935bd1e995;
+ const int r = 47;
+ uint64_t h = seed ^ ( len * m );
+ const uint64_t *data = ( const uint64_t * ) key;
+ const uint64_t *end = data + ( len / 8 );
+
+ while ( data != end )
+ {
+ uint64_t k = *data++;
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+ h ^= k;
+ h *= m;
+ }
+
+ const unsigned char *data2 = ( const unsigned char * ) data;
+
+ switch ( len & 7 )
+ {
+ case 7:
+ h ^= uint64_t ( data2[6] ) << 48;
+
+ case 6:
+ h ^= uint64_t ( data2[5] ) << 40;
+
+ case 5:
+ h ^= uint64_t ( data2[4] ) << 32;
+
+ case 4:
+ h ^= uint64_t ( data2[3] ) << 24;
+
+ case 3:
+ h ^= uint64_t ( data2[2] ) << 16;
+
+ case 2:
+ h ^= uint64_t ( data2[1] ) << 8;
+
+ case 1:
+ h ^= uint64_t ( data2[0] );
+ h *= m;
+ };
+
+ h ^= h >> r;
+
+ h *= m;
+
+ h ^= h >> r;
+
+ return h;
}
//for 32bit platform
-inline uint64_t MurmurHash64B ( const void * key, int len, unsigned int seed )
+inline uint64_t MurmurHash64B ( const void *key, int len, unsigned int seed )
{
- const unsigned int m = 0x5bd1e995;
- const int r = 24;
- unsigned int h1 = seed ^ len;
- unsigned int h2 = 0;
- const unsigned int * data = ( const unsigned int * ) key;
-
- while ( len >= 8 )
- {
- unsigned int k1 = *data++;
- k1 *= m;
- k1 ^= k1 >> r;
- k1 *= m;
- h1 *= m;
- h1 ^= k1;
- len -= 4;
- unsigned int k2 = *data++;
- k2 *= m;
- k2 ^= k2 >> r;
- k2 *= m;
- h2 *= m;
- h2 ^= k2;
- len -= 4;
- }
-
- if ( len >= 4 )
- {
- unsigned int k1 = *data++;
- k1 *= m;
- k1 ^= k1 >> r;
- k1 *= m;
- h1 *= m;
- h1 ^= k1;
- len -= 4;
- }
-
- switch ( len )
- {
- case 3:
- h2 ^= ( ( unsigned char * ) data ) [2] << 16;
- case 2:
- h2 ^= ( ( unsigned char * ) data ) [1] << 8;
- case 1:
- h2 ^= ( ( unsigned char * ) data ) [0];
- h2 *= m;
- };
-
- h1 ^= h2 >> 18;
-
- h1 *= m;
-
- h2 ^= h1 >> 22;
-
- h2 *= m;
-
- h1 ^= h2 >> 17;
-
- h1 *= m;
-
- h2 ^= h1 >> 19;
-
- h2 *= m;
-
- uint64_t h = h1;
-
- h = ( h << 32 ) | h2;
-
- return h;
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
+ unsigned int h1 = seed ^ len;
+ unsigned int h2 = 0;
+ const unsigned int *data = ( const unsigned int * ) key;
+
+ while ( len >= 8 )
+ {
+ unsigned int k1 = *data++;
+ k1 *= m;
+ k1 ^= k1 >> r;
+ k1 *= m;
+ h1 *= m;
+ h1 ^= k1;
+ len -= 4;
+ unsigned int k2 = *data++;
+ k2 *= m;
+ k2 ^= k2 >> r;
+ k2 *= m;
+ h2 *= m;
+ h2 ^= k2;
+ len -= 4;
+ }
+
+ if ( len >= 4 )
+ {
+ unsigned int k1 = *data++;
+ k1 *= m;
+ k1 ^= k1 >> r;
+ k1 *= m;
+ h1 *= m;
+ h1 ^= k1;
+ len -= 4;
+ }
+
+ switch ( len )
+ {
+ case 3:
+ h2 ^= ( ( unsigned char * ) data ) [2] << 16;
+
+ case 2:
+ h2 ^= ( ( unsigned char * ) data ) [1] << 8;
+
+ case 1:
+ h2 ^= ( ( unsigned char * ) data ) [0];
+ h2 *= m;
+ };
+
+ h1 ^= h2 >> 18;
+
+ h1 *= m;
+
+ h2 ^= h1 >> 22;
+
+ h2 *= m;
+
+ h1 ^= h2 >> 17;
+
+ h1 *= m;
+
+ h2 ^= h1 >> 19;
+
+ h2 *= m;
+
+ uint64_t h = h1;
+
+ h = ( h << 32 ) | h2;
+
+ return h;
}
#endif
diff --git a/sparsePregraph/inc/sparse_kmer.h b/sparsePregraph/inc/sparse_kmer.h
index d013ded..3179f2e 100644
--- a/sparsePregraph/inc/sparse_kmer.h
+++ b/sparsePregraph/inc/sparse_kmer.h
@@ -1,7 +1,7 @@
/*
* inc/sparse_kmer.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -25,294 +25,299 @@
#include "seq_util.h"
-inline void initKmerFilter ( int K_size, kmer_t2 * kmer_filter )
+inline void initKmerFilter ( int K_size, kmer_t2 *kmer_filter )
{
#ifdef _63MER_
- ( kmer_filter->kmer ) [0] = 0;
- ( kmer_filter->kmer ) [1] = 1LU;
- L_shift_NC ( kmer_filter->kmer, K_size * 2, 2 );
-
- if ( K_size <= 31 )
- {
- ( kmer_filter->kmer ) [1] -= 1; //
- }
- else
- {
- ( kmer_filter->kmer ) [0] -= 1; // K_size = 32 is also ok ..
- ( kmer_filter->kmer ) [1] = -1; //fff..
- }
+ ( kmer_filter->kmer ) [0] = 0;
+ ( kmer_filter->kmer ) [1] = 1LU;
+ L_shift_NC ( kmer_filter->kmer, K_size * 2, 2 );
+
+ if ( K_size <= 31 )
+ {
+ ( kmer_filter->kmer ) [1] -= 1; //
+ }
+ else
+ {
+ ( kmer_filter->kmer ) [0] -= 1; // K_size = 32 is also ok ..
+ ( kmer_filter->kmer ) [1] = -1; //fff..
+ }
#endif
#ifdef _127MER_
- memset ( kmer_filter->kmer, 0, sizeof ( kmer_t2 ) );
- ( kmer_filter->kmer ) [3] = 1LU;
- L_shift_NC ( kmer_filter->kmer, K_size * 2, 4 );
-
- if ( K_size <= 31 )
- {
- ( kmer_filter->kmer ) [3] -= 1;
- }
- else if ( K_size <= 63 )
- {
- ( kmer_filter->kmer ) [2] -= 1;
- ( kmer_filter->kmer ) [3] = -1;
- }
- else if ( K_size <= 95 )
- {
- ( kmer_filter->kmer ) [1] -= 1;
- ( kmer_filter->kmer ) [2] = -1;
- ( kmer_filter->kmer ) [3] = -1;
- }
- else if ( K_size <= 127 )
- {
- ( kmer_filter->kmer ) [0] -= 1;
- ( kmer_filter->kmer ) [1] = -1;
- ( kmer_filter->kmer ) [2] = -1;
- ( kmer_filter->kmer ) [3] = -1;
- }
+ memset ( kmer_filter->kmer, 0, sizeof ( kmer_t2 ) );
+ ( kmer_filter->kmer ) [3] = 1LU;
+ L_shift_NC ( kmer_filter->kmer, K_size * 2, 4 );
+
+ if ( K_size <= 31 )
+ {
+ ( kmer_filter->kmer ) [3] -= 1;
+ }
+ else if ( K_size <= 63 )
+ {
+ ( kmer_filter->kmer ) [2] -= 1;
+ ( kmer_filter->kmer ) [3] = -1;
+ }
+ else if ( K_size <= 95 )
+ {
+ ( kmer_filter->kmer ) [1] -= 1;
+ ( kmer_filter->kmer ) [2] = -1;
+ ( kmer_filter->kmer ) [3] = -1;
+ }
+ else if ( K_size <= 127 )
+ {
+ ( kmer_filter->kmer ) [0] -= 1;
+ ( kmer_filter->kmer ) [1] = -1;
+ ( kmer_filter->kmer ) [2] = -1;
+ ( kmer_filter->kmer ) [3] = -1;
+ }
#endif
}
-inline void kmerAnd ( kmer_t2 * k1, kmer_t2 * k2 ) //change k1
+inline void kmerAnd ( kmer_t2 *k1, kmer_t2 *k2 ) //change k1
{
#ifdef _63MER_
- ( k1->kmer ) [0] &= ( k2->kmer ) [0];
- ( k1->kmer ) [1] &= ( k2->kmer ) [1];
+ ( k1->kmer ) [0] &= ( k2->kmer ) [0];
+ ( k1->kmer ) [1] &= ( k2->kmer ) [1];
#endif
#ifdef _127MER_
- ( k1->kmer ) [0] &= ( k2->kmer ) [0];
- ( k1->kmer ) [1] &= ( k2->kmer ) [1];
- ( k1->kmer ) [2] &= ( k2->kmer ) [2];
- ( k1->kmer ) [3] &= ( k2->kmer ) [3];
+ ( k1->kmer ) [0] &= ( k2->kmer ) [0];
+ ( k1->kmer ) [1] &= ( k2->kmer ) [1];
+ ( k1->kmer ) [2] &= ( k2->kmer ) [2];
+ ( k1->kmer ) [3] &= ( k2->kmer ) [3];
#endif
}
-inline void kmerOr ( kmer_t2 * k1, kmer_t2 * k2 ) //change k1
+inline void kmerOr ( kmer_t2 *k1, kmer_t2 *k2 ) //change k1
{
#ifdef _63MER_
- ( k1->kmer ) [0] |= ( k2->kmer ) [0];
- ( k1->kmer ) [1] |= ( k2->kmer ) [1];
+ ( k1->kmer ) [0] |= ( k2->kmer ) [0];
+ ( k1->kmer ) [1] |= ( k2->kmer ) [1];
#endif
#ifdef _127MER_
- ( k1->kmer ) [0] |= ( k2->kmer ) [0];
- ( k1->kmer ) [1] |= ( k2->kmer ) [1];
- ( k1->kmer ) [2] |= ( k2->kmer ) [2];
- ( k1->kmer ) [3] |= ( k2->kmer ) [3];
+ ( k1->kmer ) [0] |= ( k2->kmer ) [0];
+ ( k1->kmer ) [1] |= ( k2->kmer ) [1];
+ ( k1->kmer ) [2] |= ( k2->kmer ) [2];
+ ( k1->kmer ) [3] |= ( k2->kmer ) [3];
#endif
}
-inline void kmerMoveRight ( kmer_t2 * kmer, int base_num )
+inline void kmerMoveRight ( kmer_t2 *kmer, int base_num )
{
#ifdef _63MER_
- R_shift_NC ( kmer->kmer, base_num * 2, 2 );
+ R_shift_NC ( kmer->kmer, base_num * 2, 2 );
#endif
#ifdef _127MER_
- R_shift_NC ( kmer->kmer, base_num * 2, 4 ); // has move 32 bug
+ R_shift_NC ( kmer->kmer, base_num * 2, 4 ); // has move 32 bug
#endif
}
-inline void kmerMoveLeft ( kmer_t2 * kmer, int base_num )
+inline void kmerMoveLeft ( kmer_t2 *kmer, int base_num )
{
#ifdef _63MER_
- L_shift_NC ( kmer->kmer, base_num * 2, 2 );
+ L_shift_NC ( kmer->kmer, base_num * 2, 2 );
#endif
#ifdef _127MER_
- L_shift_NC ( kmer->kmer, base_num * 2, 4 );
+ L_shift_NC ( kmer->kmer, base_num * 2, 4 );
#endif
}
-inline int kmerCompare ( kmer_t2 * k1, kmer_t2 * k2 )
+inline int kmerCompare ( kmer_t2 *k1, kmer_t2 *k2 )
{
#ifdef _63MER_
- return uint64_t_cmp ( k1->kmer, k2->kmer, 2 );
+ return uint64_t_cmp ( k1->kmer, k2->kmer, 2 );
#endif
#ifdef _127MER_
- return uint64_t_cmp ( k1->kmer, k2->kmer, 4 );
+ return uint64_t_cmp ( k1->kmer, k2->kmer, 4 );
#endif
}
-inline void reverseCompKmer ( kmer_t2 * kmer , int K_size ) //result stored in *kmer ...
+inline void reverseCompKmer ( kmer_t2 *kmer , int K_size ) //result stored in *kmer ...
{
#ifdef _63MER_
- get_rev_comp_seq_arr ( kmer->kmer, K_size, 2 );
+ get_rev_comp_seq_arr ( kmer->kmer, K_size, 2 );
#endif
#ifdef _127MER_
- get_rev_comp_seq_arr ( kmer->kmer, K_size, 4 );
+ get_rev_comp_seq_arr ( kmer->kmer, K_size, 4 );
#endif
}
-inline bool isSmallerKmer ( kmer_t2 * kmer, int K_size )
+inline bool isSmallerKmer ( kmer_t2 *kmer, int K_size )
{
- kmer_t2 f_kmer;
- f_kmer = *kmer;
- reverseCompKmer ( &f_kmer, K_size );
+ kmer_t2 f_kmer;
+ f_kmer = *kmer;
+ reverseCompKmer ( &f_kmer, K_size );
- if ( kmerCompare ( kmer, &f_kmer ) < 0 )
- {
- return 1;
- }
+ if ( kmerCompare ( kmer, &f_kmer ) < 0 )
+ {
+ return 1;
+ }
- return 0;
+ return 0;
}
-inline void get_kmer_from_seq ( const char * seq, int len, int K_size, int pos, kmer_t2 * kmer )
+inline void get_kmer_from_seq ( const char *seq, int len, int K_size, int pos, kmer_t2 *kmer )
{
- if ( pos + K_size > len )
- {
- fprintf ( stderr, "ERROR: get_kmer position is invalid!\n" );
- exit ( 1 );
- }
-
- int start = pos, end = pos + K_size, index;
- uint64_t * arr_ptr = kmer->kmer;
- memset ( arr_ptr, 0, sizeof ( kmer_t2 ) );
- int arr_sz = sizeof ( kmer_t2 ) / sizeof ( uint64_t );
- int i = 0;
- uint64_t tmp = 0;
-
- for ( index = start, i = 0; index < end; index++, i++ )
- {
- switch ( seq[index] )
- {
- case 'A':
- tmp = tmp << 2;
- break;
- case 'C':
- tmp = ( tmp << 2 ) | 1;
- break;
- case 'G':
- tmp = ( tmp << 2 ) | 2;
- break;
- case 'T':
- tmp = ( tmp << 2 ) | 3;
- break;
- case 'N':
- tmp = ( tmp << 2 ) | 2; //treat N as G
- break;
- default:
- tmp = ( tmp << 2 ) | 2; // treat unknown char as G, 'S'
- fprintf ( stderr, "WARNING: get_kmer_from_seq process unknown char %c\n", seq[index] );
- //exit(1);
- }
-
- if ( ( i + 1 ) % 32 == 0 ) //tmp is full, tmp has stored 32 bases
- {
- arr_ptr[i / 32] = tmp;
- tmp = 0;
- }
- }
-
- if ( i != K_size )
- {
- fprintf ( stderr, "ERROR: i %d is K_size \n", i );
- }
-
- if ( K_size <= 31 )
- {
- arr_ptr[arr_sz - 1] = tmp;
- }
- else //if(K_size%32 != 0){ //absolute ..because K is odd
- {
- int left_bits = ( 32 - K_size % 32 ) * 2;
- tmp = tmp << left_bits;
- arr_ptr[K_size / 32] = tmp;
- kmerMoveRight ( kmer, 32 * arr_sz - K_size );
- }
-
- /*
-
- uint64_t high=0,low=0;
- int high_start=0,high_end=0;
- int low_satrt=0,low_end=0;
-
- if(K_size >= 33){
- high_start = pos;
- high_end = pos+K_size-32;
-
- low_satrt = high_end;
- low_end = low_satrt + 32;
- }else{
- high_start = high_end = pos;
- low_satrt = pos;
- low_end = pos+K_size;
- }
-
- //debug<<"kmer ";
- for(int i=high_start;i<high_end;++i){ //dif from soapdenovo
- //debug<<seq[i];
- switch(seq[i]){
- case 'A':
- high = high<< 2;
- break;
- case 'C':
- high = (high << 2)|1;
- break;
- case 'G':
- high = (high << 2)|2;
- break;
- case 'T':
- high = (high << 2)|3;
- break;
- case 'N':
- high = (high << 2)|2;//treat N as G as same as soapdenovo
- //debug_build<<"N occured at "<<i<<endl;
- break;
- default:
- printf("error in process unknown char %c\n",seq[i]);
- exit(1);
- }
- }
-
- //debug<<" ";
-
- for(int i=low_satrt;i<low_end;++i){ //dif from soapdenovo
- //debug<<seq[i];
- switch(seq[i]){
- case 'A':
- low= low<< 2;
- break;
- case 'C':
- low = (low << 2)|1;
- break;
- case 'G':
- low = (low << 2)|2;
- break;
- case 'T':
- low = (low << 2)|3;
- break;
- case 'N':
- low = (low << 2)|2;//treat N as G as same as soapdenovo
- //debug_build<<"N occured at "<<i<<endl;
- break;
- default:
- printf("error in process unknown char %c\n",seq[i]);
- exit(1);
- }
- }
-
- kmer[0]=high;
- kmer[1]=low;
-
- */
+ if ( pos + K_size > len )
+ {
+ fprintf ( stderr, "ERROR: get_kmer position is invalid!\n" );
+ exit ( 1 );
+ }
+
+ int start = pos, end = pos + K_size, index;
+ uint64_t *arr_ptr = kmer->kmer;
+ memset ( arr_ptr, 0, sizeof ( kmer_t2 ) );
+ int arr_sz = sizeof ( kmer_t2 ) / sizeof ( uint64_t );
+ int i = 0;
+ uint64_t tmp = 0;
+
+ for ( index = start, i = 0; index < end; index++, i++ )
+ {
+ switch ( seq[index] )
+ {
+ case 'A':
+ tmp = tmp << 2;
+ break;
+
+ case 'C':
+ tmp = ( tmp << 2 ) | 1;
+ break;
+
+ case 'G':
+ tmp = ( tmp << 2 ) | 2;
+ break;
+
+ case 'T':
+ tmp = ( tmp << 2 ) | 3;
+ break;
+
+ case 'N':
+ tmp = ( tmp << 2 ) | 2; //treat N as G
+ break;
+
+ default:
+ tmp = ( tmp << 2 ) | 2; // treat unknown char as G, 'S'
+ fprintf ( stderr, "WARNING: get_kmer_from_seq process unknown char %c\n", seq[index] );
+ //exit(1);
+ }
+
+ if ( ( i + 1 ) % 32 == 0 ) //tmp is full, tmp has stored 32 bases
+ {
+ arr_ptr[i / 32] = tmp;
+ tmp = 0;
+ }
+ }
+
+ if ( i != K_size )
+ {
+ fprintf ( stderr, "ERROR: i %d is K_size \n", i );
+ }
+
+ if ( K_size <= 31 )
+ {
+ arr_ptr[arr_sz - 1] = tmp;
+ }
+ else //if(K_size%32 != 0){ //absolute ..because K is odd
+ {
+ int left_bits = ( 32 - K_size % 32 ) * 2;
+ tmp = tmp << left_bits;
+ arr_ptr[K_size / 32] = tmp;
+ kmerMoveRight ( kmer, 32 * arr_sz - K_size );
+ }
+
+ /*
+
+ uint64_t high=0,low=0;
+ int high_start=0,high_end=0;
+ int low_satrt=0,low_end=0;
+
+ if(K_size >= 33){
+ high_start = pos;
+ high_end = pos+K_size-32;
+
+ low_satrt = high_end;
+ low_end = low_satrt + 32;
+ }else{
+ high_start = high_end = pos;
+ low_satrt = pos;
+ low_end = pos+K_size;
+ }
+
+ //debug<<"kmer ";
+ for(int i=high_start;i<high_end;++i){ //dif from soapdenovo
+ //debug<<seq[i];
+ switch(seq[i]){
+ case 'A':
+ high = high<< 2;
+ break;
+ case 'C':
+ high = (high << 2)|1;
+ break;
+ case 'G':
+ high = (high << 2)|2;
+ break;
+ case 'T':
+ high = (high << 2)|3;
+ break;
+ case 'N':
+ high = (high << 2)|2;//treat N as G as same as soapdenovo
+ //debug_build<<"N occured at "<<i<<endl;
+ break;
+ default:
+ printf("error in process unknown char %c\n",seq[i]);
+ exit(1);
+ }
+ }
+
+ //debug<<" ";
+
+ for(int i=low_satrt;i<low_end;++i){ //dif from soapdenovo
+ //debug<<seq[i];
+ switch(seq[i]){
+ case 'A':
+ low= low<< 2;
+ break;
+ case 'C':
+ low = (low << 2)|1;
+ break;
+ case 'G':
+ low = (low << 2)|2;
+ break;
+ case 'T':
+ low = (low << 2)|3;
+ break;
+ case 'N':
+ low = (low << 2)|2;//treat N as G as same as soapdenovo
+ //debug_build<<"N occured at "<<i<<endl;
+ break;
+ default:
+ printf("error in process unknown char %c\n",seq[i]);
+ exit(1);
+ }
+ }
+
+ kmer[0]=high;
+ kmer[1]=low;
+
+ */
}
-inline void printKmer ( const kmer_t2 * kmer, FILE * fp )
+inline void printKmer ( const kmer_t2 *kmer, FILE *fp )
{
#ifdef _63MER_
- fprintf ( fp, "%llx %llx ,\n", ( kmer->kmer ) [0], ( kmer->kmer ) [1] );
+ fprintf ( fp, "%llx %llx ,\n", ( kmer->kmer ) [0], ( kmer->kmer ) [1] );
#endif
#ifdef _127MER_
- fprintf ( fp, "%llx %llx %llx %llx,\n", ( kmer->kmer ) [0], ( kmer->kmer ) [1], ( kmer->kmer ) [2], ( kmer->kmer ) [3] );
+ fprintf ( fp, "%llx %llx %llx %llx,\n", ( kmer->kmer ) [0], ( kmer->kmer ) [1], ( kmer->kmer ) [2], ( kmer->kmer ) [3] );
#endif
}
-inline void printKmerSeq ( kmer_t2 * kmer, int K_size, FILE * fp ) //TODO printf ATCG
+inline void printKmerSeq ( kmer_t2 *kmer, int K_size, FILE *fp ) //TODO printf ATCG
{
- char str[128];
- bitsarr2str ( kmer->kmer, K_size, str, sizeof ( kmer_t2 ) / sizeof ( uint64_t ) );
- fprintf ( fp, "%s ,\n", str );
+ char str[128];
+ bitsarr2str ( kmer->kmer, K_size, str, sizeof ( kmer_t2 ) / sizeof ( uint64_t ) );
+ fprintf ( fp, "%s ,\n", str );
}
#endif
diff --git a/sparsePregraph/inc/stdinc.h b/sparsePregraph/inc/stdinc.h
index 5cd64c0..3a02528 100644
--- a/sparsePregraph/inc/stdinc.h
+++ b/sparsePregraph/inc/stdinc.h
@@ -1,7 +1,7 @@
/*
* inc/stdinc.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
diff --git a/sparsePregraph/inc/xcurses.h b/sparsePregraph/inc/xcurses.h
index 45e1359..ff01bdf 100644
--- a/sparsePregraph/inc/xcurses.h
+++ b/sparsePregraph/inc/xcurses.h
@@ -54,11 +54,11 @@ extern "C"
# define bool _bool
#endif
- /*----------------------------------------------------------------------
- *
- * PDCurses Manifest Constants
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Manifest Constants
+ *
+ */
#ifndef FALSE
# define FALSE 0
@@ -76,43 +76,43 @@ extern "C"
# define OK 0
#endif
- /*----------------------------------------------------------------------
- *
- * PDCurses Type Declarations
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Type Declarations
+ *
+ */
- typedef unsigned char bool; /* PDCurses Boolean type */
+typedef unsigned char bool; /* PDCurses Boolean type */
#ifdef CHTYPE_LONG
# if _LP64
- typedef unsigned int chtype;
+typedef unsigned int chtype;
# else
- typedef unsigned long chtype; /* 16-bit attr + 16-bit char */
+typedef unsigned long chtype; /* 16-bit attr + 16-bit char */
# endif
#else
- typedef unsigned short chtype; /* 8-bit attr + 8-bit char */
+typedef unsigned short chtype; /* 8-bit attr + 8-bit char */
#endif
#ifdef PDC_WIDE
- typedef chtype cchar_t;
+typedef chtype cchar_t;
#endif
- typedef chtype attr_t;
+typedef chtype attr_t;
- /*----------------------------------------------------------------------
- *
- * PDCurses Mouse Interface -- SYSVR4, with extensions
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Mouse Interface -- SYSVR4, with extensions
+ *
+ */
- typedef struct
- {
- int x; /* absolute column, 0 based, measured in characters */
- int y; /* absolute row, 0 based, measured in characters */
- short button[3]; /* state of each button */
- int changes; /* flags indicating what has changed with the mouse */
- } MOUSE_STATUS;
+typedef struct
+{
+ int x; /* absolute column, 0 based, measured in characters */
+ int y; /* absolute row, 0 based, measured in characters */
+ short button[3]; /* state of each button */
+ int changes; /* flags indicating what has changed with the mouse */
+} MOUSE_STATUS;
#define BUTTON_RELEASED 0x0000
#define BUTTON_PRESSED 0x0001
@@ -131,18 +131,18 @@ extern "C"
#define MOUSE_X_POS (Mouse_status.x)
#define MOUSE_Y_POS (Mouse_status.y)
- /*
- * Bits associated with the .changes field:
- * 3 2 1 0
- * 210987654321098765432109876543210
- * 1 <- button 1 has changed
- * 10 <- button 2 has changed
- * 100 <- button 3 has changed
- * 1000 <- mouse has moved
- * 10000 <- mouse position report
- * 100000 <- mouse wheel up
- * 1000000 <- mouse wheel down
- */
+/*
+ * Bits associated with the .changes field:
+ * 3 2 1 0
+ * 210987654321098765432109876543210
+ * 1 <- button 1 has changed
+ * 10 <- button 2 has changed
+ * 100 <- button 3 has changed
+ * 1000 <- mouse has moved
+ * 10000 <- mouse position report
+ * 100000 <- mouse wheel up
+ * 1000000 <- mouse wheel down
+ */
#define PDC_MOUSE_MOVED 0x0008
#define PDC_MOUSE_POSITION 0x0010
@@ -157,7 +157,7 @@ extern "C"
#define MOUSE_WHEEL_UP (Mouse_status.changes & PDC_MOUSE_WHEEL_UP)
#define MOUSE_WHEEL_DOWN (Mouse_status.changes & PDC_MOUSE_WHEEL_DOWN)
- /* mouse bit-masks */
+/* mouse bit-masks */
#define BUTTON1_RELEASED 0x00000001L
#define BUTTON1_PRESSED 0x00000002L
@@ -180,9 +180,9 @@ extern "C"
#define BUTTON3_TRIPLE_CLICKED 0x00004000L
#define BUTTON3_MOVED 0x00004000L /* PDCurses */
- /* For the ncurses-compatible functions only, BUTTON4_PRESSED and
- BUTTON5_PRESSED are returned for mouse scroll wheel up and down;
- otherwise PDCurses doesn't support buttons 4 and 5 */
+/* For the ncurses-compatible functions only, BUTTON4_PRESSED and
+ BUTTON5_PRESSED are returned for mouse scroll wheel up and down;
+ otherwise PDCurses doesn't support buttons 4 and 5 */
#define BUTTON4_RELEASED 0x00008000L
#define BUTTON4_PRESSED 0x00010000L
@@ -204,17 +204,17 @@ extern "C"
#define ALL_MOUSE_EVENTS 0x1fffffffL
#define REPORT_MOUSE_POSITION 0x20000000L
- /* ncurses mouse interface */
+/* ncurses mouse interface */
- typedef unsigned long mmask_t;
+typedef unsigned long mmask_t;
- typedef struct
- {
- short id; /* unused, always 0 */
- int x, y, z; /* x, y same as MOUSE_STATUS; z unused */
- mmask_t bstate; /* equivalent to changes + button[], but
+typedef struct
+{
+ short id; /* unused, always 0 */
+ int x, y, z; /* x, y same as MOUSE_STATUS; z unused */
+ mmask_t bstate; /* equivalent to changes + button[], but
in the same format as used for mousemask() */
- } MEVENT;
+} MEVENT;
#ifdef NCURSES_MOUSE_VERSION
# define BUTTON_SHIFT BUTTON_MODIFIER_SHIFT
@@ -227,103 +227,103 @@ extern "C"
# define BUTTON_ALT PDC_BUTTON_ALT
#endif
- /*----------------------------------------------------------------------
- *
- * PDCurses Structure Definitions
- *
- */
-
- typedef struct _win /* definition of a window */
- {
- int _cury; /* current pseudo-cursor */
- int _curx;
- int _maxy; /* max window coordinates */
- int _maxx;
- int _begy; /* origin on screen */
- int _begx;
- int _flags; /* window properties */
- chtype _attrs; /* standard attributes and colors */
- chtype _bkgd; /* background, normally blank */
- bool _clear; /* causes clear at next refresh */
- bool _leaveit; /* leaves cursor where it is */
- bool _scroll; /* allows window scrolling */
- bool _nodelay; /* input character wait flag */
- bool _immed; /* immediate update flag */
- bool _sync; /* synchronise window ancestors */
- bool _use_keypad; /* flags keypad key mode active */
- chtype ** _y; /* pointer to line pointer array */
- int * _firstch; /* first changed character in line */
- int * _lastch; /* last changed character in line */
- int _tmarg; /* top of scrolling region */
- int _bmarg; /* bottom of scrolling region */
- int _delayms; /* milliseconds of delay for getch() */
- int _parx, _pary; /* coords relative to parent (0,0) */
- struct _win * _parent; /* subwin's pointer to parent win */
- } WINDOW;
-
- /* Avoid using the SCREEN struct directly -- use the corresponding
- functions if possible. This struct may eventually be made private. */
-
- typedef struct
- {
- bool alive; /* if initscr() called, and not endwin() */
- bool autocr; /* if cr -> lf */
- bool cbreak; /* if terminal unbuffered */
- bool echo; /* if terminal echo */
- bool raw_inp; /* raw input mode (v. cooked input) */
- bool raw_out; /* raw output mode (7 v. 8 bits) */
- bool audible; /* FALSE if the bell is visual */
- bool mono; /* TRUE if current screen is mono */
- bool resized; /* TRUE if TERM has been resized */
- bool orig_attr; /* TRUE if we have the original colors */
- short orig_fore; /* original screen foreground color */
- short orig_back; /* original screen foreground color */
- int cursrow; /* position of physical cursor */
- int curscol; /* position of physical cursor */
- int visibility; /* visibility of cursor */
- int orig_cursor; /* original cursor size */
- int lines; /* new value for LINES */
- int cols; /* new value for COLS */
- unsigned long _trap_mbe; /* trap these mouse button events */
- unsigned long _map_mbe_to_key; /* map mouse buttons to slk */
- int mouse_wait; /* time to wait (in ms) for a
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Structure Definitions
+ *
+ */
+
+typedef struct _win /* definition of a window */
+{
+ int _cury; /* current pseudo-cursor */
+ int _curx;
+ int _maxy; /* max window coordinates */
+ int _maxx;
+ int _begy; /* origin on screen */
+ int _begx;
+ int _flags; /* window properties */
+ chtype _attrs; /* standard attributes and colors */
+ chtype _bkgd; /* background, normally blank */
+ bool _clear; /* causes clear at next refresh */
+ bool _leaveit; /* leaves cursor where it is */
+ bool _scroll; /* allows window scrolling */
+ bool _nodelay; /* input character wait flag */
+ bool _immed; /* immediate update flag */
+ bool _sync; /* synchronise window ancestors */
+ bool _use_keypad; /* flags keypad key mode active */
+ chtype **_y; /* pointer to line pointer array */
+ int *_firstch; /* first changed character in line */
+ int *_lastch; /* last changed character in line */
+ int _tmarg; /* top of scrolling region */
+ int _bmarg; /* bottom of scrolling region */
+ int _delayms; /* milliseconds of delay for getch() */
+ int _parx, _pary; /* coords relative to parent (0,0) */
+ struct _win *_parent; /* subwin's pointer to parent win */
+} WINDOW;
+
+/* Avoid using the SCREEN struct directly -- use the corresponding
+ functions if possible. This struct may eventually be made private. */
+
+typedef struct
+{
+ bool alive; /* if initscr() called, and not endwin() */
+ bool autocr; /* if cr -> lf */
+ bool cbreak; /* if terminal unbuffered */
+ bool echo; /* if terminal echo */
+ bool raw_inp; /* raw input mode (v. cooked input) */
+ bool raw_out; /* raw output mode (7 v. 8 bits) */
+ bool audible; /* FALSE if the bell is visual */
+ bool mono; /* TRUE if current screen is mono */
+ bool resized; /* TRUE if TERM has been resized */
+ bool orig_attr; /* TRUE if we have the original colors */
+ short orig_fore; /* original screen foreground color */
+ short orig_back; /* original screen foreground color */
+ int cursrow; /* position of physical cursor */
+ int curscol; /* position of physical cursor */
+ int visibility; /* visibility of cursor */
+ int orig_cursor; /* original cursor size */
+ int lines; /* new value for LINES */
+ int cols; /* new value for COLS */
+ unsigned long _trap_mbe; /* trap these mouse button events */
+ unsigned long _map_mbe_to_key; /* map mouse buttons to slk */
+ int mouse_wait; /* time to wait (in ms) for a
button release after a press, in
order to count it as a click */
- int slklines; /* lines in use by slk_init() */
- WINDOW * slk_winptr; /* window for slk */
- int linesrippedoff; /* lines ripped off via ripoffline() */
- int linesrippedoffontop; /* lines ripped off on
+ int slklines; /* lines in use by slk_init() */
+ WINDOW *slk_winptr; /* window for slk */
+ int linesrippedoff; /* lines ripped off via ripoffline() */
+ int linesrippedoffontop; /* lines ripped off on
top via ripoffline() */
- int delaytenths; /* 1/10ths second to wait block
+ int delaytenths; /* 1/10ths second to wait block
getch() for */
- bool _preserve; /* TRUE if screen background
+ bool _preserve; /* TRUE if screen background
to be preserved */
- int _restore; /* specifies if screen background
+ int _restore; /* specifies if screen background
to be restored, and how */
- bool save_key_modifiers; /* TRUE if each key modifiers saved
+ bool save_key_modifiers; /* TRUE if each key modifiers saved
with each key press */
- bool return_key_modifiers; /* TRUE if modifier keys are
+ bool return_key_modifiers; /* TRUE if modifier keys are
returned as "real" keys */
- bool key_code; /* TRUE if last key is a special key;
+ bool key_code; /* TRUE if last key is a special key;
used internally by get_wch() */
#ifdef XCURSES
- int XcurscrSize; /* size of Xcurscr shared memory block */
- bool sb_on;
- int sb_viewport_y;
- int sb_viewport_x;
- int sb_total_y;
- int sb_total_x;
- int sb_cur_y;
- int sb_cur_x;
+ int XcurscrSize; /* size of Xcurscr shared memory block */
+ bool sb_on;
+ int sb_viewport_y;
+ int sb_viewport_x;
+ int sb_total_y;
+ int sb_total_x;
+ int sb_cur_y;
+ int sb_cur_x;
#endif
- short line_color; /* color of line attributes - default -1 */
- } SCREEN;
+ short line_color; /* color of line attributes - default -1 */
+} SCREEN;
- /*----------------------------------------------------------------------
- *
- * PDCurses External Variables
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses External Variables
+ *
+ */
#ifdef PDC_DLL_BUILD
# ifdef CURSES_LIBRARY
@@ -335,60 +335,60 @@ extern "C"
# define PDCEX extern
#endif
- PDCEX int LINES; /* terminal height */
- PDCEX int COLS; /* terminal width */
- PDCEX WINDOW * stdscr; /* the default screen window */
- PDCEX WINDOW * curscr; /* the current screen image */
- PDCEX SCREEN * SP; /* curses variables */
- PDCEX MOUSE_STATUS Mouse_status;
- PDCEX int COLORS;
- PDCEX int COLOR_PAIRS;
- PDCEX int TABSIZE;
- PDCEX chtype acs_map[]; /* alternate character set map */
- PDCEX char ttytype[]; /* terminal name/description */
+PDCEX int LINES; /* terminal height */
+PDCEX int COLS; /* terminal width */
+PDCEX WINDOW *stdscr; /* the default screen window */
+PDCEX WINDOW *curscr; /* the current screen image */
+PDCEX SCREEN *SP; /* curses variables */
+PDCEX MOUSE_STATUS Mouse_status;
+PDCEX int COLORS;
+PDCEX int COLOR_PAIRS;
+PDCEX int TABSIZE;
+PDCEX chtype acs_map[]; /* alternate character set map */
+PDCEX char ttytype[]; /* terminal name/description */
- /*man-start**************************************************************
+/*man-start**************************************************************
- PDCurses Text Attributes
- ========================
+PDCurses Text Attributes
+========================
- Originally, PDCurses used a short (16 bits) for its chtype. To include
- color, a number of things had to be sacrificed from the strict Unix and
- System V support. The main problem was fitting all character attributes
- and color into an unsigned char (all 8 bits!).
+Originally, PDCurses used a short (16 bits) for its chtype. To include
+color, a number of things had to be sacrificed from the strict Unix and
+System V support. The main problem was fitting all character attributes
+and color into an unsigned char (all 8 bits!).
- Today, PDCurses by default uses a long (32 bits) for its chtype, as in
- System V. The short chtype is still available, by undefining CHTYPE_LONG
- and rebuilding the library.
+Today, PDCurses by default uses a long (32 bits) for its chtype, as in
+System V. The short chtype is still available, by undefining CHTYPE_LONG
+and rebuilding the library.
- The following is the structure of a win->_attrs chtype:
+The following is the structure of a win->_attrs chtype:
- short form:
+short form:
- -------------------------------------------------
- |15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0|
- -------------------------------------------------
- color number | attrs | character eg 'a'
+-------------------------------------------------
+|15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0|
+-------------------------------------------------
+ color number | attrs | character eg 'a'
- The available non-color attributes are bold, reverse and blink. Others
- have no effect. The high order char is an index into an array of
- physical colors (defined in color.c) -- 32 foreground/background color
- pairs (5 bits) plus 3 bits for other attributes.
+The available non-color attributes are bold, reverse and blink. Others
+have no effect. The high order char is an index into an array of
+physical colors (defined in color.c) -- 32 foreground/background color
+pairs (5 bits) plus 3 bits for other attributes.
- long form:
+long form:
- ----------------------------------------------------------------------------
- |31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|..| 3| 2| 1| 0|
- ----------------------------------------------------------------------------
- color number | modifiers | character eg 'a'
+----------------------------------------------------------------------------
+|31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|..| 3| 2| 1| 0|
+----------------------------------------------------------------------------
+ color number | modifiers | character eg 'a'
- The available non-color attributes are bold, underline, invisible,
- right-line, left-line, protect, reverse and blink. 256 color pairs (8
- bits), 8 bits for other attributes, and 16 bits for character data.
+The available non-color attributes are bold, underline, invisible,
+right-line, left-line, protect, reverse and blink. 256 color pairs (8
+bits), 8 bits for other attributes, and 16 bits for character data.
- **man-end****************************************************************/
+**man-end****************************************************************/
- /*** Video attribute macros ***/
+/*** Video attribute macros ***/
#define A_NORMAL (chtype)0
@@ -440,8 +440,8 @@ extern "C"
#define ATR_MSK A_ATTRIBUTES /* Obsolete */
#define ATR_NRM A_NORMAL /* Obsolete */
- /* For use with attr_t -- X/Open says, "these shall be distinct", so
- this is a non-conforming implementation. */
+/* For use with attr_t -- X/Open says, "these shall be distinct", so
+ this is a non-conforming implementation. */
#define WA_ALTCHARSET A_ALTCHARSET
#define WA_BLINK A_BLINK
@@ -460,11 +460,11 @@ extern "C"
#define WA_TOP A_NORMAL
#define WA_VERTICAL A_NORMAL
- /*** Alternate character set macros ***/
+/*** Alternate character set macros ***/
- /* 'w' = 32-bit chtype; acs_map[] index | A_ALTCHARSET
- 'n' = 16-bit chtype; it gets the fallback set because no bit is
- available for A_ALTCHARSET */
+/* 'w' = 32-bit chtype; acs_map[] index | A_ALTCHARSET
+ 'n' = 16-bit chtype; it gets the fallback set because no bit is
+ available for A_ALTCHARSET */
#ifdef CHTYPE_LONG
# define ACS_PICK(w, n) ((chtype)w | A_ALTCHARSET)
@@ -472,7 +472,7 @@ extern "C"
# define ACS_PICK(w, n) ((chtype)n)
#endif
- /* VT100-compatible symbols -- box chars */
+/* VT100-compatible symbols -- box chars */
#define ACS_ULCORNER ACS_PICK('l', '+')
#define ACS_LLCORNER ACS_PICK('m', '+')
@@ -486,7 +486,7 @@ extern "C"
#define ACS_VLINE ACS_PICK('x', '|')
#define ACS_PLUS ACS_PICK('n', '+')
- /* VT100-compatible symbols -- other */
+/* VT100-compatible symbols -- other */
#define ACS_S1 ACS_PICK('o', '-')
#define ACS_S9 ACS_PICK('s', '_')
@@ -496,9 +496,9 @@ extern "C"
#define ACS_PLMINUS ACS_PICK('g', '#')
#define ACS_BULLET ACS_PICK('~', 'o')
- /* Teletype 5410v1 symbols -- these are defined in SysV curses, but
- are not well-supported by most terminals. Stick to VT100 characters
- for optimum portability. */
+/* Teletype 5410v1 symbols -- these are defined in SysV curses, but
+ are not well-supported by most terminals. Stick to VT100 characters
+ for optimum portability. */
#define ACS_LARROW ACS_PICK(',', '<')
#define ACS_RARROW ACS_PICK('+', '>')
@@ -508,8 +508,8 @@ extern "C"
#define ACS_LANTERN ACS_PICK('i', '*')
#define ACS_BLOCK ACS_PICK('0', '#')
- /* That goes double for these -- undocumented SysV symbols. Don't use
- them. */
+/* That goes double for these -- undocumented SysV symbols. Don't use
+ them. */
#define ACS_S3 ACS_PICK('p', '-')
#define ACS_S7 ACS_PICK('r', '-')
@@ -519,7 +519,7 @@ extern "C"
#define ACS_NEQUAL ACS_PICK('|', '+')
#define ACS_STERLING ACS_PICK('}', 'L')
- /* Box char aliases */
+/* Box char aliases */
#define ACS_BSSB ACS_ULCORNER
#define ACS_SSBB ACS_LLCORNER
@@ -533,7 +533,7 @@ extern "C"
#define ACS_SBSB ACS_VLINE
#define ACS_SSSS ACS_PLUS
- /* cchar_t aliases */
+/* cchar_t aliases */
#ifdef PDC_WIDE
# define WACS_ULCORNER (&(acs_map['l']))
@@ -585,7 +585,7 @@ extern "C"
# define WACS_SSSS WACS_PLUS
#endif
- /*** Color macros ***/
+/*** Color macros ***/
#define COLOR_BLACK 0
@@ -605,12 +605,12 @@ extern "C"
#define COLOR_WHITE 7
- /*----------------------------------------------------------------------
- *
- * Function and Keypad Key Definitions.
- * Many are just for compatibility.
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * Function and Keypad Key Definitions.
+ * Many are just for compatibility.
+ *
+ */
#define KEY_CODE_YES 0x100 /* If get_wch() gives a key code */
@@ -703,7 +703,7 @@ extern "C"
#define KEY_SUSPEND 0x195 /* suspend key */
#define KEY_UNDO 0x196 /* undo key */
- /* PDCurses-specific key definitions -- PC only */
+/* PDCurses-specific key definitions -- PC only */
#define ALT_0 0x197
#define ALT_1 0x198
@@ -861,479 +861,479 @@ extern "C"
#define KEY_F(n) (KEY_F0 + (n))
- /*----------------------------------------------------------------------
- *
- * PDCurses Function Declarations
- *
- */
-
- /* Standard */
-
- int addch ( const chtype );
- int addchnstr ( const chtype *, int );
- int addchstr ( const chtype * );
- int addnstr ( const char *, int );
- int addstr ( const char * );
- int attroff ( chtype );
- int attron ( chtype );
- int attrset ( chtype );
- int attr_get ( attr_t *, short *, void * );
- int attr_off ( attr_t, void * );
- int attr_on ( attr_t, void * );
- int attr_set ( attr_t, short, void * );
- int baudrate ( void );
- int beep ( void );
- int bkgd ( chtype );
- void bkgdset ( chtype );
- int border ( chtype, chtype, chtype, chtype, chtype, chtype, chtype, chtype );
- int box ( WINDOW *, chtype, chtype );
- bool can_change_color ( void );
- int cbreak ( void );
- int chgat ( int, attr_t, short, const void * );
- int clearok ( WINDOW *, bool );
- int clear ( void );
- int clrtobot ( void );
- int clrtoeol ( void );
- int color_content ( short, short *, short *, short * );
- int color_set ( short, void * );
- int copywin ( const WINDOW *, WINDOW *, int, int, int, int, int, int, int );
- int curs_set ( int );
- int def_prog_mode ( void );
- int def_shell_mode ( void );
- int delay_output ( int );
- int delch ( void );
- int deleteln ( void );
- void delscreen ( SCREEN * );
- int delwin ( WINDOW * );
- WINDOW * derwin ( WINDOW *, int, int, int, int );
- int doupdate ( void );
- WINDOW * dupwin ( WINDOW * );
- int echochar ( const chtype );
- int echo ( void );
- int endwin ( void );
- char erasechar ( void );
- int erase ( void );
- void filter ( void );
- int flash ( void );
- int flushinp ( void );
- chtype getbkgd ( WINDOW * );
- int getnstr ( char *, int );
- int getstr ( char * );
- WINDOW * getwin ( FILE * );
- int halfdelay ( int );
- bool has_colors ( void );
- bool has_ic ( void );
- bool has_il ( void );
- int hline ( chtype, int );
- void idcok ( WINDOW *, bool );
- int idlok ( WINDOW *, bool );
- void immedok ( WINDOW *, bool );
- int inchnstr ( chtype *, int );
- int inchstr ( chtype * );
- chtype inch ( void );
- int init_color ( short, short, short, short );
- int init_pair ( short, short, short );
- WINDOW * initscr ( void );
- int innstr ( char *, int );
- int insch ( chtype );
- int insdelln ( int );
- int insertln ( void );
- int insnstr ( const char *, int );
- int insstr ( const char * );
- int instr ( char * );
- int intrflush ( WINDOW *, bool );
- bool isendwin ( void );
- bool is_linetouched ( WINDOW *, int );
- bool is_wintouched ( WINDOW * );
- char * keyname ( int );
- int keypad ( WINDOW *, bool );
- char killchar ( void );
- int leaveok ( WINDOW *, bool );
- char * longname ( void );
- int meta ( WINDOW *, bool );
- int move ( int, int );
- int mvaddch ( int, int, const chtype );
- int mvaddchnstr ( int, int, const chtype *, int );
- int mvaddchstr ( int, int, const chtype * );
- int mvaddnstr ( int, int, const char *, int );
- int mvaddstr ( int, int, const char * );
- int mvchgat ( int, int, int, attr_t, short, const void * );
- int mvcur ( int, int, int, int );
- int mvdelch ( int, int );
- int mvderwin ( WINDOW *, int, int );
- int mvgetch ( int, int );
- int mvgetnstr ( int, int, char *, int );
- int mvgetstr ( int, int, char * );
- int mvhline ( int, int, chtype, int );
- chtype mvinch ( int, int );
- int mvinchnstr ( int, int, chtype *, int );
- int mvinchstr ( int, int, chtype * );
- int mvinnstr ( int, int, char *, int );
- int mvinsch ( int, int, chtype );
- int mvinsnstr ( int, int, const char *, int );
- int mvinsstr ( int, int, const char * );
- int mvinstr ( int, int, char * );
- int mvprintw ( int, int, const char *, ... );
- int mvscanw ( int, int, const char *, ... );
- int mvvline ( int, int, chtype, int );
- int mvwaddchnstr ( WINDOW *, int, int, const chtype *, int );
- int mvwaddchstr ( WINDOW *, int, int, const chtype * );
- int mvwaddch ( WINDOW *, int, int, const chtype );
- int mvwaddnstr ( WINDOW *, int, int, const char *, int );
- int mvwaddstr ( WINDOW *, int, int, const char * );
- int mvwchgat ( WINDOW *, int, int, int, attr_t, short, const void * );
- int mvwdelch ( WINDOW *, int, int );
- int mvwgetch ( WINDOW *, int, int );
- int mvwgetnstr ( WINDOW *, int, int, char *, int );
- int mvwgetstr ( WINDOW *, int, int, char * );
- int mvwhline ( WINDOW *, int, int, chtype, int );
- int mvwinchnstr ( WINDOW *, int, int, chtype *, int );
- int mvwinchstr ( WINDOW *, int, int, chtype * );
- chtype mvwinch ( WINDOW *, int, int );
- int mvwinnstr ( WINDOW *, int, int, char *, int );
- int mvwinsch ( WINDOW *, int, int, chtype );
- int mvwinsnstr ( WINDOW *, int, int, const char *, int );
- int mvwinsstr ( WINDOW *, int, int, const char * );
- int mvwinstr ( WINDOW *, int, int, char * );
- int mvwin ( WINDOW *, int, int );
- int mvwprintw ( WINDOW *, int, int, const char *, ... );
- int mvwscanw ( WINDOW *, int, int, const char *, ... );
- int mvwvline ( WINDOW *, int, int, chtype, int );
- int napms ( int );
- WINDOW * newpad ( int, int );
- SCREEN * newterm ( const char *, FILE *, FILE * );
- WINDOW * newwin ( int, int, int, int );
- int nl ( void );
- int nocbreak ( void );
- int nodelay ( WINDOW *, bool );
- int noecho ( void );
- int nonl ( void );
- void noqiflush ( void );
- int noraw ( void );
- int notimeout ( WINDOW *, bool );
- int overlay ( const WINDOW *, WINDOW * );
- int overwrite ( const WINDOW *, WINDOW * );
- int pair_content ( short, short *, short * );
- int pechochar ( WINDOW *, chtype );
- int pnoutrefresh ( WINDOW *, int, int, int, int, int, int );
- int prefresh ( WINDOW *, int, int, int, int, int, int );
- int printw ( const char *, ... );
- int putwin ( WINDOW *, FILE * );
- void qiflush ( void );
- int raw ( void );
- int redrawwin ( WINDOW * );
- int refresh ( void );
- int reset_prog_mode ( void );
- int reset_shell_mode ( void );
- int resetty ( void );
- int ripoffline ( int, int ( * ) ( WINDOW *, int ) );
- int savetty ( void );
- int scanw ( const char *, ... );
- int scr_dump ( const char * );
- int scr_init ( const char * );
- int scr_restore ( const char * );
- int scr_set ( const char * );
- int scrl ( int );
- int scroll ( WINDOW * );
- int scrollok ( WINDOW *, bool );
- SCREEN * set_term ( SCREEN * );
- int setscrreg ( int, int );
- int slk_attroff ( const chtype );
- int slk_attr_off ( const attr_t, void * );
- int slk_attron ( const chtype );
- int slk_attr_on ( const attr_t, void * );
- int slk_attrset ( const chtype );
- int slk_attr_set ( const attr_t, short, void * );
- int slk_clear ( void );
- int slk_color ( short );
- int slk_init ( int );
- char * slk_label ( int );
- int slk_noutrefresh ( void );
- int slk_refresh ( void );
- int slk_restore ( void );
- int slk_set ( int, const char *, int );
- int slk_touch ( void );
- int standend ( void );
- int standout ( void );
- int start_color ( void );
- WINDOW * subpad ( WINDOW *, int, int, int, int );
- WINDOW * subwin ( WINDOW *, int, int, int, int );
- int syncok ( WINDOW *, bool );
- chtype termattrs ( void );
- attr_t term_attrs ( void );
- char * termname ( void );
- void timeout ( int );
- int touchline ( WINDOW *, int, int );
- int touchwin ( WINDOW * );
- int typeahead ( int );
- int untouchwin ( WINDOW * );
- void use_env ( bool );
- int vidattr ( chtype );
- int vid_attr ( attr_t, short, void * );
- int vidputs ( chtype, int ( * ) ( int ) );
- int vid_puts ( attr_t, short, void *, int ( * ) ( int ) );
- int vline ( chtype, int );
- int vw_printw ( WINDOW *, const char *, va_list );
- int vwprintw ( WINDOW *, const char *, va_list );
- int vw_scanw ( WINDOW *, const char *, va_list );
- int vwscanw ( WINDOW *, const char *, va_list );
- int waddchnstr ( WINDOW *, const chtype *, int );
- int waddchstr ( WINDOW *, const chtype * );
- int waddch ( WINDOW *, const chtype );
- int waddnstr ( WINDOW *, const char *, int );
- int waddstr ( WINDOW *, const char * );
- int wattroff ( WINDOW *, chtype );
- int wattron ( WINDOW *, chtype );
- int wattrset ( WINDOW *, chtype );
- int wattr_get ( WINDOW *, attr_t *, short *, void * );
- int wattr_off ( WINDOW *, attr_t, void * );
- int wattr_on ( WINDOW *, attr_t, void * );
- int wattr_set ( WINDOW *, attr_t, short, void * );
- void wbkgdset ( WINDOW *, chtype );
- int wbkgd ( WINDOW *, chtype );
- int wborder ( WINDOW *, chtype, chtype, chtype, chtype,
- chtype, chtype, chtype, chtype );
- int wchgat ( WINDOW *, int, attr_t, short, const void * );
- int wclear ( WINDOW * );
- int wclrtobot ( WINDOW * );
- int wclrtoeol ( WINDOW * );
- int wcolor_set ( WINDOW *, short, void * );
- void wcursyncup ( WINDOW * );
- int wdelch ( WINDOW * );
- int wdeleteln ( WINDOW * );
- int wechochar ( WINDOW *, const chtype );
- int werase ( WINDOW * );
- int wgetch ( WINDOW * );
- int wgetnstr ( WINDOW *, char *, int );
- int wgetstr ( WINDOW *, char * );
- int whline ( WINDOW *, chtype, int );
- int winchnstr ( WINDOW *, chtype *, int );
- int winchstr ( WINDOW *, chtype * );
- chtype winch ( WINDOW * );
- int winnstr ( WINDOW *, char *, int );
- int winsch ( WINDOW *, chtype );
- int winsdelln ( WINDOW *, int );
- int winsertln ( WINDOW * );
- int winsnstr ( WINDOW *, const char *, int );
- int winsstr ( WINDOW *, const char * );
- int winstr ( WINDOW *, char * );
- int wmove ( WINDOW *, int, int );
- int wnoutrefresh ( WINDOW * );
- int wprintw ( WINDOW *, const char *, ... );
- int wredrawln ( WINDOW *, int, int );
- int wrefresh ( WINDOW * );
- int wscanw ( WINDOW *, const char *, ... );
- int wscrl ( WINDOW *, int );
- int wsetscrreg ( WINDOW *, int, int );
- int wstandend ( WINDOW * );
- int wstandout ( WINDOW * );
- void wsyncdown ( WINDOW * );
- void wsyncup ( WINDOW * );
- void wtimeout ( WINDOW *, int );
- int wtouchln ( WINDOW *, int, int, int );
- int wvline ( WINDOW *, chtype, int );
-
- /* Wide-character functions */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Function Declarations
+ *
+ */
+
+/* Standard */
+
+int addch ( const chtype );
+int addchnstr ( const chtype *, int );
+int addchstr ( const chtype * );
+int addnstr ( const char *, int );
+int addstr ( const char * );
+int attroff ( chtype );
+int attron ( chtype );
+int attrset ( chtype );
+int attr_get ( attr_t *, short *, void * );
+int attr_off ( attr_t, void * );
+int attr_on ( attr_t, void * );
+int attr_set ( attr_t, short, void * );
+int baudrate ( void );
+int beep ( void );
+int bkgd ( chtype );
+void bkgdset ( chtype );
+int border ( chtype, chtype, chtype, chtype, chtype, chtype, chtype, chtype );
+int box ( WINDOW *, chtype, chtype );
+bool can_change_color ( void );
+int cbreak ( void );
+int chgat ( int, attr_t, short, const void * );
+int clearok ( WINDOW *, bool );
+int clear ( void );
+int clrtobot ( void );
+int clrtoeol ( void );
+int color_content ( short, short *, short *, short * );
+int color_set ( short, void * );
+int copywin ( const WINDOW *, WINDOW *, int, int, int, int, int, int, int );
+int curs_set ( int );
+int def_prog_mode ( void );
+int def_shell_mode ( void );
+int delay_output ( int );
+int delch ( void );
+int deleteln ( void );
+void delscreen ( SCREEN * );
+int delwin ( WINDOW * );
+WINDOW *derwin ( WINDOW *, int, int, int, int );
+int doupdate ( void );
+WINDOW *dupwin ( WINDOW * );
+int echochar ( const chtype );
+int echo ( void );
+int endwin ( void );
+char erasechar ( void );
+int erase ( void );
+void filter ( void );
+int flash ( void );
+int flushinp ( void );
+chtype getbkgd ( WINDOW * );
+int getnstr ( char *, int );
+int getstr ( char * );
+WINDOW *getwin ( FILE * );
+int halfdelay ( int );
+bool has_colors ( void );
+bool has_ic ( void );
+bool has_il ( void );
+int hline ( chtype, int );
+void idcok ( WINDOW *, bool );
+int idlok ( WINDOW *, bool );
+void immedok ( WINDOW *, bool );
+int inchnstr ( chtype *, int );
+int inchstr ( chtype * );
+chtype inch ( void );
+int init_color ( short, short, short, short );
+int init_pair ( short, short, short );
+WINDOW *initscr ( void );
+int innstr ( char *, int );
+int insch ( chtype );
+int insdelln ( int );
+int insertln ( void );
+int insnstr ( const char *, int );
+int insstr ( const char * );
+int instr ( char * );
+int intrflush ( WINDOW *, bool );
+bool isendwin ( void );
+bool is_linetouched ( WINDOW *, int );
+bool is_wintouched ( WINDOW * );
+char *keyname ( int );
+int keypad ( WINDOW *, bool );
+char killchar ( void );
+int leaveok ( WINDOW *, bool );
+char *longname ( void );
+int meta ( WINDOW *, bool );
+int move ( int, int );
+int mvaddch ( int, int, const chtype );
+int mvaddchnstr ( int, int, const chtype *, int );
+int mvaddchstr ( int, int, const chtype * );
+int mvaddnstr ( int, int, const char *, int );
+int mvaddstr ( int, int, const char * );
+int mvchgat ( int, int, int, attr_t, short, const void * );
+int mvcur ( int, int, int, int );
+int mvdelch ( int, int );
+int mvderwin ( WINDOW *, int, int );
+int mvgetch ( int, int );
+int mvgetnstr ( int, int, char *, int );
+int mvgetstr ( int, int, char * );
+int mvhline ( int, int, chtype, int );
+chtype mvinch ( int, int );
+int mvinchnstr ( int, int, chtype *, int );
+int mvinchstr ( int, int, chtype * );
+int mvinnstr ( int, int, char *, int );
+int mvinsch ( int, int, chtype );
+int mvinsnstr ( int, int, const char *, int );
+int mvinsstr ( int, int, const char * );
+int mvinstr ( int, int, char * );
+int mvprintw ( int, int, const char *, ... );
+int mvscanw ( int, int, const char *, ... );
+int mvvline ( int, int, chtype, int );
+int mvwaddchnstr ( WINDOW *, int, int, const chtype *, int );
+int mvwaddchstr ( WINDOW *, int, int, const chtype * );
+int mvwaddch ( WINDOW *, int, int, const chtype );
+int mvwaddnstr ( WINDOW *, int, int, const char *, int );
+int mvwaddstr ( WINDOW *, int, int, const char * );
+int mvwchgat ( WINDOW *, int, int, int, attr_t, short, const void * );
+int mvwdelch ( WINDOW *, int, int );
+int mvwgetch ( WINDOW *, int, int );
+int mvwgetnstr ( WINDOW *, int, int, char *, int );
+int mvwgetstr ( WINDOW *, int, int, char * );
+int mvwhline ( WINDOW *, int, int, chtype, int );
+int mvwinchnstr ( WINDOW *, int, int, chtype *, int );
+int mvwinchstr ( WINDOW *, int, int, chtype * );
+chtype mvwinch ( WINDOW *, int, int );
+int mvwinnstr ( WINDOW *, int, int, char *, int );
+int mvwinsch ( WINDOW *, int, int, chtype );
+int mvwinsnstr ( WINDOW *, int, int, const char *, int );
+int mvwinsstr ( WINDOW *, int, int, const char * );
+int mvwinstr ( WINDOW *, int, int, char * );
+int mvwin ( WINDOW *, int, int );
+int mvwprintw ( WINDOW *, int, int, const char *, ... );
+int mvwscanw ( WINDOW *, int, int, const char *, ... );
+int mvwvline ( WINDOW *, int, int, chtype, int );
+int napms ( int );
+WINDOW *newpad ( int, int );
+SCREEN *newterm ( const char *, FILE *, FILE * );
+WINDOW *newwin ( int, int, int, int );
+int nl ( void );
+int nocbreak ( void );
+int nodelay ( WINDOW *, bool );
+int noecho ( void );
+int nonl ( void );
+void noqiflush ( void );
+int noraw ( void );
+int notimeout ( WINDOW *, bool );
+int overlay ( const WINDOW *, WINDOW * );
+int overwrite ( const WINDOW *, WINDOW * );
+int pair_content ( short, short *, short * );
+int pechochar ( WINDOW *, chtype );
+int pnoutrefresh ( WINDOW *, int, int, int, int, int, int );
+int prefresh ( WINDOW *, int, int, int, int, int, int );
+int printw ( const char *, ... );
+int putwin ( WINDOW *, FILE * );
+void qiflush ( void );
+int raw ( void );
+int redrawwin ( WINDOW * );
+int refresh ( void );
+int reset_prog_mode ( void );
+int reset_shell_mode ( void );
+int resetty ( void );
+int ripoffline ( int, int ( * ) ( WINDOW *, int ) );
+int savetty ( void );
+int scanw ( const char *, ... );
+int scr_dump ( const char * );
+int scr_init ( const char * );
+int scr_restore ( const char * );
+int scr_set ( const char * );
+int scrl ( int );
+int scroll ( WINDOW * );
+int scrollok ( WINDOW *, bool );
+SCREEN *set_term ( SCREEN * );
+int setscrreg ( int, int );
+int slk_attroff ( const chtype );
+int slk_attr_off ( const attr_t, void * );
+int slk_attron ( const chtype );
+int slk_attr_on ( const attr_t, void * );
+int slk_attrset ( const chtype );
+int slk_attr_set ( const attr_t, short, void * );
+int slk_clear ( void );
+int slk_color ( short );
+int slk_init ( int );
+char *slk_label ( int );
+int slk_noutrefresh ( void );
+int slk_refresh ( void );
+int slk_restore ( void );
+int slk_set ( int, const char *, int );
+int slk_touch ( void );
+int standend ( void );
+int standout ( void );
+int start_color ( void );
+WINDOW *subpad ( WINDOW *, int, int, int, int );
+WINDOW *subwin ( WINDOW *, int, int, int, int );
+int syncok ( WINDOW *, bool );
+chtype termattrs ( void );
+attr_t term_attrs ( void );
+char *termname ( void );
+void timeout ( int );
+int touchline ( WINDOW *, int, int );
+int touchwin ( WINDOW * );
+int typeahead ( int );
+int untouchwin ( WINDOW * );
+void use_env ( bool );
+int vidattr ( chtype );
+int vid_attr ( attr_t, short, void * );
+int vidputs ( chtype, int ( * ) ( int ) );
+int vid_puts ( attr_t, short, void *, int ( * ) ( int ) );
+int vline ( chtype, int );
+int vw_printw ( WINDOW *, const char *, va_list );
+int vwprintw ( WINDOW *, const char *, va_list );
+int vw_scanw ( WINDOW *, const char *, va_list );
+int vwscanw ( WINDOW *, const char *, va_list );
+int waddchnstr ( WINDOW *, const chtype *, int );
+int waddchstr ( WINDOW *, const chtype * );
+int waddch ( WINDOW *, const chtype );
+int waddnstr ( WINDOW *, const char *, int );
+int waddstr ( WINDOW *, const char * );
+int wattroff ( WINDOW *, chtype );
+int wattron ( WINDOW *, chtype );
+int wattrset ( WINDOW *, chtype );
+int wattr_get ( WINDOW *, attr_t *, short *, void * );
+int wattr_off ( WINDOW *, attr_t, void * );
+int wattr_on ( WINDOW *, attr_t, void * );
+int wattr_set ( WINDOW *, attr_t, short, void * );
+void wbkgdset ( WINDOW *, chtype );
+int wbkgd ( WINDOW *, chtype );
+int wborder ( WINDOW *, chtype, chtype, chtype, chtype,
+ chtype, chtype, chtype, chtype );
+int wchgat ( WINDOW *, int, attr_t, short, const void * );
+int wclear ( WINDOW * );
+int wclrtobot ( WINDOW * );
+int wclrtoeol ( WINDOW * );
+int wcolor_set ( WINDOW *, short, void * );
+void wcursyncup ( WINDOW * );
+int wdelch ( WINDOW * );
+int wdeleteln ( WINDOW * );
+int wechochar ( WINDOW *, const chtype );
+int werase ( WINDOW * );
+int wgetch ( WINDOW * );
+int wgetnstr ( WINDOW *, char *, int );
+int wgetstr ( WINDOW *, char * );
+int whline ( WINDOW *, chtype, int );
+int winchnstr ( WINDOW *, chtype *, int );
+int winchstr ( WINDOW *, chtype * );
+chtype winch ( WINDOW * );
+int winnstr ( WINDOW *, char *, int );
+int winsch ( WINDOW *, chtype );
+int winsdelln ( WINDOW *, int );
+int winsertln ( WINDOW * );
+int winsnstr ( WINDOW *, const char *, int );
+int winsstr ( WINDOW *, const char * );
+int winstr ( WINDOW *, char * );
+int wmove ( WINDOW *, int, int );
+int wnoutrefresh ( WINDOW * );
+int wprintw ( WINDOW *, const char *, ... );
+int wredrawln ( WINDOW *, int, int );
+int wrefresh ( WINDOW * );
+int wscanw ( WINDOW *, const char *, ... );
+int wscrl ( WINDOW *, int );
+int wsetscrreg ( WINDOW *, int, int );
+int wstandend ( WINDOW * );
+int wstandout ( WINDOW * );
+void wsyncdown ( WINDOW * );
+void wsyncup ( WINDOW * );
+void wtimeout ( WINDOW *, int );
+int wtouchln ( WINDOW *, int, int, int );
+int wvline ( WINDOW *, chtype, int );
+
+/* Wide-character functions */
#ifdef PDC_WIDE
- int addnwstr ( const wchar_t *, int );
- int addwstr ( const wchar_t * );
- int add_wch ( const cchar_t * );
- int add_wchnstr ( const cchar_t *, int );
- int add_wchstr ( const cchar_t * );
- int border_set ( const cchar_t *, const cchar_t *, const cchar_t *,
- const cchar_t *, const cchar_t *, const cchar_t *,
- const cchar_t *, const cchar_t * );
- int box_set ( WINDOW *, const cchar_t *, const cchar_t * );
- int echo_wchar ( const cchar_t * );
- int erasewchar ( wchar_t * );
- int getbkgrnd ( cchar_t * );
- int getcchar ( const cchar_t *, wchar_t *, attr_t *, short *, void * );
- int getn_wstr ( wint_t *, int );
- int get_wch ( wint_t * );
- int get_wstr ( wint_t * );
- int hline_set ( const cchar_t *, int );
- int innwstr ( wchar_t *, int );
- int ins_nwstr ( const wchar_t *, int );
- int ins_wch ( const cchar_t * );
- int ins_wstr ( const wchar_t * );
- int inwstr ( wchar_t * );
- int in_wch ( cchar_t * );
- int in_wchnstr ( cchar_t *, int );
- int in_wchstr ( cchar_t * );
- char * key_name ( wchar_t );
- int killwchar ( wchar_t * );
- int mvaddnwstr ( int, int, const wchar_t *, int );
- int mvaddwstr ( int, int, const wchar_t * );
- int mvadd_wch ( int, int, const cchar_t * );
- int mvadd_wchnstr ( int, int, const cchar_t *, int );
- int mvadd_wchstr ( int, int, const cchar_t * );
- int mvgetn_wstr ( int, int, wint_t *, int );
- int mvget_wch ( int, int, wint_t * );
- int mvget_wstr ( int, int, wint_t * );
- int mvhline_set ( int, int, const cchar_t *, int );
- int mvinnwstr ( int, int, wchar_t *, int );
- int mvins_nwstr ( int, int, const wchar_t *, int );
- int mvins_wch ( int, int, const cchar_t * );
- int mvins_wstr ( int, int, const wchar_t * );
- int mvinwstr ( int, int, wchar_t * );
- int mvin_wch ( int, int, cchar_t * );
- int mvin_wchnstr ( int, int, cchar_t *, int );
- int mvin_wchstr ( int, int, cchar_t * );
- int mvvline_set ( int, int, const cchar_t *, int );
- int mvwaddnwstr ( WINDOW *, int, int, const wchar_t *, int );
- int mvwaddwstr ( WINDOW *, int, int, const wchar_t * );
- int mvwadd_wch ( WINDOW *, int, int, const cchar_t * );
- int mvwadd_wchnstr ( WINDOW *, int, int, const cchar_t *, int );
- int mvwadd_wchstr ( WINDOW *, int, int, const cchar_t * );
- int mvwgetn_wstr ( WINDOW *, int, int, wint_t *, int );
- int mvwget_wch ( WINDOW *, int, int, wint_t * );
- int mvwget_wstr ( WINDOW *, int, int, wint_t * );
- int mvwhline_set ( WINDOW *, int, int, const cchar_t *, int );
- int mvwinnwstr ( WINDOW *, int, int, wchar_t *, int );
- int mvwins_nwstr ( WINDOW *, int, int, const wchar_t *, int );
- int mvwins_wch ( WINDOW *, int, int, const cchar_t * );
- int mvwins_wstr ( WINDOW *, int, int, const wchar_t * );
- int mvwin_wch ( WINDOW *, int, int, cchar_t * );
- int mvwin_wchnstr ( WINDOW *, int, int, cchar_t *, int );
- int mvwin_wchstr ( WINDOW *, int, int, cchar_t * );
- int mvwinwstr ( WINDOW *, int, int, wchar_t * );
- int mvwvline_set ( WINDOW *, int, int, const cchar_t *, int );
- int pecho_wchar ( WINDOW *, const cchar_t * );
- int setcchar ( cchar_t *, const wchar_t *, const attr_t, short, const void * );
- int slk_wset ( int, const wchar_t *, int );
- int unget_wch ( const wchar_t );
- int vline_set ( const cchar_t *, int );
- int waddnwstr ( WINDOW *, const wchar_t *, int );
- int waddwstr ( WINDOW *, const wchar_t * );
- int wadd_wch ( WINDOW *, const cchar_t * );
- int wadd_wchnstr ( WINDOW *, const cchar_t *, int );
- int wadd_wchstr ( WINDOW *, const cchar_t * );
- int wbkgrnd ( WINDOW *, const cchar_t * );
- void wbkgrndset ( WINDOW *, const cchar_t * );
- int wborder_set ( WINDOW *, const cchar_t *, const cchar_t *,
- const cchar_t *, const cchar_t *, const cchar_t *,
- const cchar_t *, const cchar_t *, const cchar_t * );
- int wecho_wchar ( WINDOW *, const cchar_t * );
- int wgetbkgrnd ( WINDOW *, cchar_t * );
- int wgetn_wstr ( WINDOW *, wint_t *, int );
- int wget_wch ( WINDOW *, wint_t * );
- int wget_wstr ( WINDOW *, wint_t * );
- int whline_set ( WINDOW *, const cchar_t *, int );
- int winnwstr ( WINDOW *, wchar_t *, int );
- int wins_nwstr ( WINDOW *, const wchar_t *, int );
- int wins_wch ( WINDOW *, const cchar_t * );
- int wins_wstr ( WINDOW *, const wchar_t * );
- int winwstr ( WINDOW *, wchar_t * );
- int win_wch ( WINDOW *, cchar_t * );
- int win_wchnstr ( WINDOW *, cchar_t *, int );
- int win_wchstr ( WINDOW *, cchar_t * );
- wchar_t * wunctrl ( cchar_t * );
- int wvline_set ( WINDOW *, const cchar_t *, int );
+int addnwstr ( const wchar_t *, int );
+int addwstr ( const wchar_t * );
+int add_wch ( const cchar_t * );
+int add_wchnstr ( const cchar_t *, int );
+int add_wchstr ( const cchar_t * );
+int border_set ( const cchar_t *, const cchar_t *, const cchar_t *,
+ const cchar_t *, const cchar_t *, const cchar_t *,
+ const cchar_t *, const cchar_t * );
+int box_set ( WINDOW *, const cchar_t *, const cchar_t * );
+int echo_wchar ( const cchar_t * );
+int erasewchar ( wchar_t * );
+int getbkgrnd ( cchar_t * );
+int getcchar ( const cchar_t *, wchar_t *, attr_t *, short *, void * );
+int getn_wstr ( wint_t *, int );
+int get_wch ( wint_t * );
+int get_wstr ( wint_t * );
+int hline_set ( const cchar_t *, int );
+int innwstr ( wchar_t *, int );
+int ins_nwstr ( const wchar_t *, int );
+int ins_wch ( const cchar_t * );
+int ins_wstr ( const wchar_t * );
+int inwstr ( wchar_t * );
+int in_wch ( cchar_t * );
+int in_wchnstr ( cchar_t *, int );
+int in_wchstr ( cchar_t * );
+char *key_name ( wchar_t );
+int killwchar ( wchar_t * );
+int mvaddnwstr ( int, int, const wchar_t *, int );
+int mvaddwstr ( int, int, const wchar_t * );
+int mvadd_wch ( int, int, const cchar_t * );
+int mvadd_wchnstr ( int, int, const cchar_t *, int );
+int mvadd_wchstr ( int, int, const cchar_t * );
+int mvgetn_wstr ( int, int, wint_t *, int );
+int mvget_wch ( int, int, wint_t * );
+int mvget_wstr ( int, int, wint_t * );
+int mvhline_set ( int, int, const cchar_t *, int );
+int mvinnwstr ( int, int, wchar_t *, int );
+int mvins_nwstr ( int, int, const wchar_t *, int );
+int mvins_wch ( int, int, const cchar_t * );
+int mvins_wstr ( int, int, const wchar_t * );
+int mvinwstr ( int, int, wchar_t * );
+int mvin_wch ( int, int, cchar_t * );
+int mvin_wchnstr ( int, int, cchar_t *, int );
+int mvin_wchstr ( int, int, cchar_t * );
+int mvvline_set ( int, int, const cchar_t *, int );
+int mvwaddnwstr ( WINDOW *, int, int, const wchar_t *, int );
+int mvwaddwstr ( WINDOW *, int, int, const wchar_t * );
+int mvwadd_wch ( WINDOW *, int, int, const cchar_t * );
+int mvwadd_wchnstr ( WINDOW *, int, int, const cchar_t *, int );
+int mvwadd_wchstr ( WINDOW *, int, int, const cchar_t * );
+int mvwgetn_wstr ( WINDOW *, int, int, wint_t *, int );
+int mvwget_wch ( WINDOW *, int, int, wint_t * );
+int mvwget_wstr ( WINDOW *, int, int, wint_t * );
+int mvwhline_set ( WINDOW *, int, int, const cchar_t *, int );
+int mvwinnwstr ( WINDOW *, int, int, wchar_t *, int );
+int mvwins_nwstr ( WINDOW *, int, int, const wchar_t *, int );
+int mvwins_wch ( WINDOW *, int, int, const cchar_t * );
+int mvwins_wstr ( WINDOW *, int, int, const wchar_t * );
+int mvwin_wch ( WINDOW *, int, int, cchar_t * );
+int mvwin_wchnstr ( WINDOW *, int, int, cchar_t *, int );
+int mvwin_wchstr ( WINDOW *, int, int, cchar_t * );
+int mvwinwstr ( WINDOW *, int, int, wchar_t * );
+int mvwvline_set ( WINDOW *, int, int, const cchar_t *, int );
+int pecho_wchar ( WINDOW *, const cchar_t * );
+int setcchar ( cchar_t *, const wchar_t *, const attr_t, short, const void * );
+int slk_wset ( int, const wchar_t *, int );
+int unget_wch ( const wchar_t );
+int vline_set ( const cchar_t *, int );
+int waddnwstr ( WINDOW *, const wchar_t *, int );
+int waddwstr ( WINDOW *, const wchar_t * );
+int wadd_wch ( WINDOW *, const cchar_t * );
+int wadd_wchnstr ( WINDOW *, const cchar_t *, int );
+int wadd_wchstr ( WINDOW *, const cchar_t * );
+int wbkgrnd ( WINDOW *, const cchar_t * );
+void wbkgrndset ( WINDOW *, const cchar_t * );
+int wborder_set ( WINDOW *, const cchar_t *, const cchar_t *,
+ const cchar_t *, const cchar_t *, const cchar_t *,
+ const cchar_t *, const cchar_t *, const cchar_t * );
+int wecho_wchar ( WINDOW *, const cchar_t * );
+int wgetbkgrnd ( WINDOW *, cchar_t * );
+int wgetn_wstr ( WINDOW *, wint_t *, int );
+int wget_wch ( WINDOW *, wint_t * );
+int wget_wstr ( WINDOW *, wint_t * );
+int whline_set ( WINDOW *, const cchar_t *, int );
+int winnwstr ( WINDOW *, wchar_t *, int );
+int wins_nwstr ( WINDOW *, const wchar_t *, int );
+int wins_wch ( WINDOW *, const cchar_t * );
+int wins_wstr ( WINDOW *, const wchar_t * );
+int winwstr ( WINDOW *, wchar_t * );
+int win_wch ( WINDOW *, cchar_t * );
+int win_wchnstr ( WINDOW *, cchar_t *, int );
+int win_wchstr ( WINDOW *, cchar_t * );
+wchar_t *wunctrl ( cchar_t * );
+int wvline_set ( WINDOW *, const cchar_t *, int );
#endif
- /* Quasi-standard */
-
- chtype getattrs ( WINDOW * );
- int getbegx ( WINDOW * );
- int getbegy ( WINDOW * );
- int getmaxx ( WINDOW * );
- int getmaxy ( WINDOW * );
- int getparx ( WINDOW * );
- int getpary ( WINDOW * );
- int getcurx ( WINDOW * );
- int getcury ( WINDOW * );
- void traceoff ( void );
- void traceon ( void );
- char * unctrl ( chtype );
-
- int crmode ( void );
- int nocrmode ( void );
- int draino ( int );
- int resetterm ( void );
- int fixterm ( void );
- int saveterm ( void );
- int setsyx ( int, int );
-
- int mouse_set ( unsigned long );
- int mouse_on ( unsigned long );
- int mouse_off ( unsigned long );
- int request_mouse_pos ( void );
- int map_button ( unsigned long );
- void wmouse_position ( WINDOW *, int *, int * );
- unsigned long getmouse ( void );
- unsigned long getbmap ( void );
-
- /* ncurses */
-
- int assume_default_colors ( int, int );
- const char * curses_version ( void );
- bool has_key ( int );
- int use_default_colors ( void );
- int wresize ( WINDOW *, int, int );
-
- int mouseinterval ( int );
- mmask_t mousemask ( mmask_t, mmask_t * );
- bool mouse_trafo ( int *, int *, bool );
- int nc_getmouse ( MEVENT * );
- int ungetmouse ( MEVENT * );
- bool wenclose ( const WINDOW *, int, int );
- bool wmouse_trafo ( const WINDOW *, int *, int *, bool );
-
- /* PDCurses */
-
- int addrawch ( chtype );
- int insrawch ( chtype );
- bool is_termresized ( void );
- int mvaddrawch ( int, int, chtype );
- int mvdeleteln ( int, int );
- int mvinsertln ( int, int );
- int mvinsrawch ( int, int, chtype );
- int mvwaddrawch ( WINDOW *, int, int, chtype );
- int mvwdeleteln ( WINDOW *, int, int );
- int mvwinsertln ( WINDOW *, int, int );
- int mvwinsrawch ( WINDOW *, int, int, chtype );
- int raw_output ( bool );
- int resize_term ( int, int );
- WINDOW * resize_window ( WINDOW *, int, int );
- int waddrawch ( WINDOW *, chtype );
- int winsrawch ( WINDOW *, chtype );
- char wordchar ( void );
+/* Quasi-standard */
+
+chtype getattrs ( WINDOW * );
+int getbegx ( WINDOW * );
+int getbegy ( WINDOW * );
+int getmaxx ( WINDOW * );
+int getmaxy ( WINDOW * );
+int getparx ( WINDOW * );
+int getpary ( WINDOW * );
+int getcurx ( WINDOW * );
+int getcury ( WINDOW * );
+void traceoff ( void );
+void traceon ( void );
+char *unctrl ( chtype );
+
+int crmode ( void );
+int nocrmode ( void );
+int draino ( int );
+int resetterm ( void );
+int fixterm ( void );
+int saveterm ( void );
+int setsyx ( int, int );
+
+int mouse_set ( unsigned long );
+int mouse_on ( unsigned long );
+int mouse_off ( unsigned long );
+int request_mouse_pos ( void );
+int map_button ( unsigned long );
+void wmouse_position ( WINDOW *, int *, int * );
+unsigned long getmouse ( void );
+unsigned long getbmap ( void );
+
+/* ncurses */
+
+int assume_default_colors ( int, int );
+const char *curses_version ( void );
+bool has_key ( int );
+int use_default_colors ( void );
+int wresize ( WINDOW *, int, int );
+
+int mouseinterval ( int );
+mmask_t mousemask ( mmask_t, mmask_t * );
+bool mouse_trafo ( int *, int *, bool );
+int nc_getmouse ( MEVENT * );
+int ungetmouse ( MEVENT * );
+bool wenclose ( const WINDOW *, int, int );
+bool wmouse_trafo ( const WINDOW *, int *, int *, bool );
+
+/* PDCurses */
+
+int addrawch ( chtype );
+int insrawch ( chtype );
+bool is_termresized ( void );
+int mvaddrawch ( int, int, chtype );
+int mvdeleteln ( int, int );
+int mvinsertln ( int, int );
+int mvinsrawch ( int, int, chtype );
+int mvwaddrawch ( WINDOW *, int, int, chtype );
+int mvwdeleteln ( WINDOW *, int, int );
+int mvwinsertln ( WINDOW *, int, int );
+int mvwinsrawch ( WINDOW *, int, int, chtype );
+int raw_output ( bool );
+int resize_term ( int, int );
+WINDOW *resize_window ( WINDOW *, int, int );
+int waddrawch ( WINDOW *, chtype );
+int winsrawch ( WINDOW *, chtype );
+char wordchar ( void );
#ifdef PDC_WIDE
- wchar_t * slk_wlabel ( int );
+wchar_t *slk_wlabel ( int );
#endif
- void PDC_debug ( const char *, ... );
- int PDC_ungetch ( int );
- int PDC_set_blink ( bool );
- int PDC_set_line_color ( short );
- void PDC_set_title ( const char * );
+void PDC_debug ( const char *, ... );
+int PDC_ungetch ( int );
+int PDC_set_blink ( bool );
+int PDC_set_line_color ( short );
+void PDC_set_title ( const char * );
- int PDC_clearclipboard ( void );
- int PDC_freeclipboard ( char * );
- int PDC_getclipboard ( char **, long * );
- int PDC_setclipboard ( const char *, long );
+int PDC_clearclipboard ( void );
+int PDC_freeclipboard ( char * );
+int PDC_getclipboard ( char **, long * );
+int PDC_setclipboard ( const char *, long );
- unsigned long PDC_get_input_fd ( void );
- unsigned long PDC_get_key_modifiers ( void );
- int PDC_return_key_modifiers ( bool );
- int PDC_save_key_modifiers ( bool );
+unsigned long PDC_get_input_fd ( void );
+unsigned long PDC_get_key_modifiers ( void );
+int PDC_return_key_modifiers ( bool );
+int PDC_save_key_modifiers ( bool );
#ifdef XCURSES
- WINDOW * Xinitscr ( int, char ** );
- void XCursesExit ( void );
- int sb_init ( void );
- int sb_set_horz ( int, int, int );
- int sb_set_vert ( int, int, int );
- int sb_get_horz ( int *, int *, int * );
- int sb_get_vert ( int *, int *, int * );
- int sb_refresh ( void );
+WINDOW *Xinitscr ( int, char ** );
+void XCursesExit ( void );
+int sb_init ( void );
+int sb_set_horz ( int, int, int );
+int sb_set_vert ( int, int, int );
+int sb_get_horz ( int *, int *, int * );
+int sb_get_vert ( int *, int *, int * );
+int sb_refresh ( void );
#endif
- /*** Functions defined as macros ***/
+/*** Functions defined as macros ***/
- /* getch() and ungetch() conflict with some DOS libraries */
+/* getch() and ungetch() conflict with some DOS libraries */
#define getch() wgetch(stdscr)
#define ungetch(ch) PDC_ungetch(ch)
@@ -1341,7 +1341,7 @@ extern "C"
#define COLOR_PAIR(n) (((chtype)(n) << PDC_COLOR_SHIFT) & A_COLOR)
#define PAIR_NUMBER(n) (((n) & A_COLOR) >> PDC_COLOR_SHIFT)
- /* These will _only_ work as macros */
+/* These will _only_ work as macros */
#define getbegyx(w, y, x) (y = getbegy(w), x = getbegx(w))
#define getmaxyx(w, y, x) (y = getmaxy(w), x = getmaxx(w))
@@ -1349,20 +1349,20 @@ extern "C"
#define getyx(w, y, x) (y = getcury(w), x = getcurx(w))
#define getsyx(y, x) { if (curscr->_leaveit) (y)=(x)=-1; \
- else getyx(curscr,(y),(x)); }
+ else getyx(curscr,(y),(x)); }
#ifdef NCURSES_MOUSE_VERSION
# define getmouse(x) nc_getmouse(x)
#endif
- /* return codes from PDC_getclipboard() and PDC_setclipboard() calls */
+/* return codes from PDC_getclipboard() and PDC_setclipboard() calls */
#define PDC_CLIP_SUCCESS 0
#define PDC_CLIP_ACCESS_ERROR 1
#define PDC_CLIP_EMPTY 2
#define PDC_CLIP_MEMORY_ERROR 3
- /* PDCurses key modifier masks */
+/* PDCurses key modifier masks */
#define PDC_KEY_MODIFIER_SHIFT 1
#define PDC_KEY_MODIFIER_CONTROL 2
diff --git a/sparsePregraph/inc/xcurses.h.gch b/sparsePregraph/inc/xcurses.h.gch
deleted file mode 100644
index e01c34a..0000000
Binary files a/sparsePregraph/inc/xcurses.h.gch and /dev/null differ
diff --git a/sparsePregraph/io_func.cpp b/sparsePregraph/io_func.cpp
index 6832bce..d68b1ea 100644
--- a/sparsePregraph/io_func.cpp
+++ b/sparsePregraph/io_func.cpp
@@ -1,7 +1,7 @@
/*
* io_func.cpp
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -48,9 +48,9 @@
static int state = -3;
static int readstate = 0;
-static samfile_t * openFile4readb ( const char * fname );
+static samfile_t *openFile4readb ( const char *fname );
-void read1seqbam ( char * src_seq, char * src_name, int max_read_len, samfile_t * in, int * type );
+void read1seqbam ( char *src_seq, char *src_name, int max_read_len, samfile_t *in, int *type );
/*************************************************
@@ -66,57 +66,64 @@ Output:
Return:
None.
*************************************************/
-void filter_N ( string & seq_s, int & bad_flag )
+void filter_N ( string &seq_s, int &bad_flag )
{
- //global max_rd_len
- //global min_rd_len
- if ( seq_s.size() > max_rd_len )
- {
- max_rd_len = seq_s.size();
- }
-
- if ( seq_s.size() < min_rd_len )
- {
- min_rd_len = seq_s.size();
- }
-
- bad_flag = 0;
-
- if ( seq_s[seq_s.size() - 1] == '\n' || seq_s[seq_s.size() - 1] == '\r' )
- {
- seq_s.resize ( seq_s.size() - 1 );
- }
-
- int seq_sz = seq_s.size();
- int nN = seq_sz, isN = -1;
-
- for ( int i = 0; i < seq_sz; ++i )
- {
- if ( seq_s[i] == '-' || seq_s[i] == 'N' )
- {
- if ( i <= seq_sz / 2 )
- {
- isN = i;
- continue;
- }
- else
- {
- nN = i - 1;
- break;
- }
- }
- }
-
- if ( nN == seq_sz && isN == -1 ) {bad_flag = 0; return;}
-
- if ( ( nN - isN ) <= seq_sz / 2 )
- {
- bad_flag = 1;
- }
-
- if ( bad_flag == 1 ) { return; }
-
- seq_s = seq_s.substr ( isN + 1, nN - isN );
+ //global max_rd_len
+ //global min_rd_len
+ if ( seq_s.size() > max_rd_len )
+ {
+ max_rd_len = seq_s.size();
+ }
+
+ if ( seq_s.size() < min_rd_len )
+ {
+ min_rd_len = seq_s.size();
+ }
+
+ bad_flag = 0;
+
+ if ( seq_s[seq_s.size() - 1] == '\n' || seq_s[seq_s.size() - 1] == '\r' )
+ {
+ seq_s.resize ( seq_s.size() - 1 );
+ }
+
+ int seq_sz = seq_s.size();
+ int nN = seq_sz, isN = -1;
+
+ for ( int i = 0; i < seq_sz; ++i )
+ {
+ if ( seq_s[i] == '-' || seq_s[i] == 'N' )
+ {
+ if ( i <= seq_sz / 2 )
+ {
+ isN = i;
+ continue;
+ }
+ else
+ {
+ nN = i - 1;
+ break;
+ }
+ }
+ }
+
+ if ( nN == seq_sz && isN == -1 )
+ {
+ bad_flag = 0;
+ return;
+ }
+
+ if ( ( nN - isN ) <= seq_sz / 2 )
+ {
+ bad_flag = 1;
+ }
+
+ if ( bad_flag == 1 )
+ {
+ return;
+ }
+
+ seq_s = seq_s.substr ( isN + 1, nN - isN );
}
@@ -134,91 +141,94 @@ Output:
Return:
None.
*************************************************/
-void read_lib ( vector<string> &filenames, char * lib_file )
+void read_lib ( vector<string> &filenames, char *lib_file )
{
- ifstream lib_in ( lib_file );
- string str;
- int read_stat = 0; // 1: begin a lib, 2:asm_flags=1 or 3
- size_t found;
- int asm_flags;
-
- while ( getline ( lib_in, str ) )
- {
- if ( read_stat == 0 ) //not start a lib
- {
- found = str.find ( "[LIB]" );
-
- if ( found == string::npos )
- {
- continue;
- }
- else
- {
- read_stat = 1;
- asm_flags = 0;
- }
- }
- else if ( read_stat == 1 ) //start reading a lib fetch asm flags
- {
- //split by "="
- found = str.find ( "asm_flags" );
-
- if ( found == string::npos )
- {
- continue;
- }
- else
- {
- found = str.find ( "=" );
- str = str.substr ( found + 1, str.size() - found );
-
- if ( str.size() == 0 )
- {
- fprintf ( stderr, "ERROR: please check asm_flags in lib file\n" );
- exit ( -1 );
- }
-
- asm_flags = atoi ( str.c_str() );
-
- if ( asm_flags == 1 || asm_flags == 3 )
- {
- read_stat = 2;
- }
- else
- {
- read_stat = 0; // next lib
- }
- }
- }
- else if ( read_stat == 2 ) // reading file
- {
- found = str.find_first_of ( "fqpb" );
-
- if ( found == 0 ) //f1 f2 q1 q2 p b
- {
- found = str.find ( "=" );
-
- if ( found > 2 ) {continue;} // the "=" should be the second or thrid poistion
-
- str = str.substr ( found + 1, str.size() - found );
- filenames.push_back ( str );
- }
- else
- {
- found = str.find ( "[LIB]" );
-
- if ( found == string::npos )
- {
- continue;
- }
- else
- {
- read_stat = 1;
- asm_flags = 0;
- }
- }
- }
- }
+ ifstream lib_in ( lib_file );
+ string str;
+ int read_stat = 0; // 1: begin a lib, 2:asm_flags=1 or 3
+ size_t found;
+ int asm_flags;
+
+ while ( getline ( lib_in, str ) )
+ {
+ if ( read_stat == 0 ) //not start a lib
+ {
+ found = str.find ( "[LIB]" );
+
+ if ( found == string::npos )
+ {
+ continue;
+ }
+ else
+ {
+ read_stat = 1;
+ asm_flags = 0;
+ }
+ }
+ else if ( read_stat == 1 ) //start reading a lib fetch asm flags
+ {
+ //split by "="
+ found = str.find ( "asm_flags" );
+
+ if ( found == string::npos )
+ {
+ continue;
+ }
+ else
+ {
+ found = str.find ( "=" );
+ str = str.substr ( found + 1, str.size() - found );
+
+ if ( str.size() == 0 )
+ {
+ fprintf ( stderr, "ERROR: please check asm_flags in lib file\n" );
+ exit ( -1 );
+ }
+
+ asm_flags = atoi ( str.c_str() );
+
+ if ( asm_flags == 1 || asm_flags == 3 )
+ {
+ read_stat = 2;
+ }
+ else
+ {
+ read_stat = 0; // next lib
+ }
+ }
+ }
+ else if ( read_stat == 2 ) // reading file
+ {
+ found = str.find_first_of ( "fqpb" );
+
+ if ( found == 0 ) //f1 f2 q1 q2 p b
+ {
+ found = str.find ( "=" );
+
+ if ( found > 2 )
+ {
+ continue; // the "=" should be the second or thrid poistion
+ }
+
+ str = str.substr ( found + 1, str.size() - found );
+ filenames.push_back ( str );
+ }
+ else
+ {
+ found = str.find ( "[LIB]" );
+
+ if ( found == string::npos )
+ {
+ continue;
+ }
+ else
+ {
+ read_stat = 1;
+ asm_flags = 0;
+ }
+ }
+ }
+ }
}
@@ -236,11 +246,14 @@ Return:
*************************************************/
void sendIOWorkSignal()
{
- if ( io_ready == 2 ) { return ; } //finish io job
-
- io_stat0 = 0;
- io_stat1 = 0;
- io_ready = 0;
+ if ( io_ready == 2 )
+ {
+ return ; //finish io job
+ }
+
+ io_stat0 = 0;
+ io_stat1 = 0;
+ io_ready = 0;
}
@@ -259,434 +272,444 @@ Return:
None.
*************************************************/
-void * run_io_thread_main ( void * arg )
+void *run_io_thread_main ( void *arg )
{
- io_para_main * paras;
- paras = ( io_para_main * ) arg;
-
- if ( !paras )
- {
- fprintf ( stderr, "ERROR: the argument passed to main io thread is NULL!\n" );
- exit ( -1 );
- }
-
- int read_buf_sz = paras->read_buf_sz;
- vector<string> in_filenames_vt = * ( paras->in_filenames_vt );
- /*
- for(int i=0;i<in_filenames_vt.size();i++){
- printf("%s\n", in_filenames_vt[i].c_str());
- }*/
- int read_num0 = 0;
- int read_num1 = 0;
- char line[1024];
- int read_buf_len = 1024;
- FILE * fp = NULL; //for normal and gzip reading
- samfile_t * fp3 = NULL; //for bam reading
- int filetype = 0; //0 normal 1 gzip 2 bam
- int file_num = 0;
-
- if ( in_filenames_vt.size() >= 1 )
- {
- string temp;
- size_t found;
- found = in_filenames_vt[file_num].find_last_of ( "." );
-
- if ( found == string::npos )
- {
- //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
- fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 0;
- }
- else
- {
- temp = in_filenames_vt[file_num].substr ( found );
-
- if ( temp.compare ( ".gz" ) == 0 ) //gzip
- {
- //temp = "gzip -dc ";
- //temp.append(in_filenames_vt[file_num]);
- //fp = popen(temp.c_str(),"r");
- fp = ( FILE * ) open_file_robust ( "gz", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 1;
- }
- else if ( temp.compare ( ".bam" ) == 0 ) //bam
- {
- //fp3 = openFile4readb(in_filenames_vt[file_num].c_str());
- fp3 = ( samfile_t * ) open_file_robust ( "bam", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 2;
- }
- else
- {
- //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
- fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 0;
- }
- }
-
- if ( !fp && !fp3 )
- {
- fprintf ( stderr, "ERROR: can't open file %s \n", in_filenames_vt[file_num].c_str() );
- exit ( 1 );
- }
- }
- else
- {
- fprintf ( stderr, "ERROR: input filenames vector is empty! please check the reads config file,option \"asm_flags\" is requried!\n" );
- exit ( 1 );
- }
-
- //fprintf(stderr,"processing file %d %s \n",file_num,in_filenames_vt[file_num].c_str());
- fprintf ( stderr, "Import reads from file:\n %s\n", in_filenames_vt[file_num].c_str() );
-
- while ( 1 )
- {
- while ( ( io_stat0 ) && ( io_stat1 ) )
- {
- usleep ( 1 );
- }
-
- if ( ! ( io_stat0 ) ) //fill buf0
- {
- io_stat0 = 1;// reading reads stat
- int ready = 0;
- int i = 0;
-
- if ( filetype == 0 || filetype == 1 ) //normal or gzip reading ...
- {
- while ( i < read_buf_sz )
- {
- if ( fgets ( line, read_buf_len, fp ) != NULL )
- {
- switch ( line[0] )
- {
- case '@':
- case '>':
- ready = 1;
- break;
- case '+':
- ready = 0;
- break;
- default:
-
- if ( ready )
- {
- read_buf0[i].clear();
- read_buf0[i].append ( line );
- i++;
- }
- }
- }
- else
- {
- break;
- }
- }
- }
- else if ( filetype == 2 ) //bam reading
- {
- int type = 0;
- char src_name[128];
-
- while ( i < read_buf_sz && readstate >= 0 ) //readstate
- {
- read1seqbam ( line, src_name, read_buf_len, fp3, &type );
-
- if ( type != -1 )
- {
- read_buf0[i].clear();
- read_buf0[i].append ( line );
- //cout<<"line:"<<line<<endl;
- //cout<<"buf0:"<<read_buf0[i]<<endl;
- i++;
- }
- }
- }
- else
- {
- fprintf ( stderr, "ERROR: filetype is not support or filename has a wrong suffix!\n" );
- exit ( -1 );
- }
-
- read_num0 = i;
- reads_all_num += i;
-
- while ( io_ready != 0 ) {usleep ( 1 );}; //wait for main send work sign
-
- if ( i == read_buf_sz )
- {
- io_stat0 = 2;
- }
- else if ( i != read_buf_sz && file_num < in_filenames_vt.size() - 1 ) //still has file unread
- {
- io_stat0 = 2;
-
- if ( filetype == 0 )
- {
- fclose ( fp );
- }
- else if ( filetype == 1 )
- {
- pclose ( fp );
- }
- else if ( filetype == 2 )
- {
- samclose ( fp3 );
- state = -3;
- readstate = 0;
- }
-
- file_num++;
- //open a new file ...
- string temp;
- size_t found;
- found = in_filenames_vt[file_num].find_last_of ( "." );
-
- if ( found == string::npos )
- {
- //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
- fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 0;
- }
- else
- {
- temp = in_filenames_vt[file_num].substr ( found );
-
- if ( temp.compare ( ".gz" ) == 0 ) //gzip
- {
- //temp = "gzip -dc ";
- //temp.append(in_filenames_vt[file_num]);
- //fp = popen(temp.c_str(),"r");
- fp = ( FILE * ) open_file_robust ( "gz", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 1;
- }
- else if ( temp.compare ( ".bam" ) == 0 ) //bam
- {
- //fp3 = openFile4readb(in_filenames_vt[file_num].c_str());
- fp3 = ( samfile_t * ) open_file_robust ( "bam", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 2;
- }
- else
- {
- //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
- fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 0;
- }
- }
-
- if ( !fp && !fp3 )
- {
- fprintf ( stderr, "ERROR: can't open file %s \n", in_filenames_vt[file_num].c_str() );
- exit ( 1 );
- }
-
- //fprintf(stderr, "processing file %d %s \n",file_num,in_filenames_vt[file_num].c_str());
- fprintf ( stderr, "Import reads from file:\n %s\n", in_filenames_vt[file_num].c_str() );
- }
- else
- {
- io_stat0 = 3;
- }
-
- seq_t = read_buf0;
- read_num = read_num0;
-
- if ( io_stat0 == 3 )
- {
- //printf("Io thread's job is finished! all reads: %llu \n",reads_all_num);
-
- //close the file...
- if ( filetype == 0 )
- {
- fclose ( fp );
- }
- else if ( filetype == 1 )
- {
- pclose ( fp );
- }
- else if ( filetype == 2 )
- {
- samclose ( fp3 );
- state = -3;
- readstate = 0;
- }
-
- io_ready = 2;
- break;
- }
-
- io_ready = 1;
- }
-
- if ( ! ( io_stat1 ) ) //fill buf1
- {
- io_stat1 = 1; //reading...
- int ready = 0;
- int i = 0;
-
- if ( filetype == 0 || filetype == 1 ) //normal or gzip reading ...
- {
- while ( i < read_buf_sz && fp )
- {
- if ( fgets ( line, read_buf_len, fp ) != NULL )
- {
- switch ( line[0] )
- {
- case '@':
- case '>':
- ready = 1;
- break;
- case '+':
- ready = 0;
- break;
- default:
-
- if ( ready )
- {
- read_buf1[i].clear();
- read_buf1[i].append ( line );
- i++;
- }
- }
- }
- else
- {
- break;
- }
- }
- }
- else if ( filetype == 2 ) //bam reading
- {
- int type = 0;
- char src_name[128];
-
- while ( i < read_buf_sz && readstate >= 0 ) //readstate
- {
- read1seqbam ( line, src_name, read_buf_len, fp3, &type );
-
- if ( type != -1 )
- {
- read_buf1[i].clear();
- read_buf1[i].append ( line );
- i++;
- }
- }
- }
- else
- {
- fprintf ( stderr, "ERROR: filetype is not support or filename has a wrong suffix!\n" );
- exit ( 1 );
- }
-
- read_num1 = i;
- reads_all_num += i;
-
- while ( io_ready != 0 ) {usleep ( 1 );}; //wait for main send work sign
-
- if ( i == read_buf_sz && ( fp || fp3 ) )
- {
- io_stat1 = 2;
- }
- else if ( i != read_buf_sz && file_num < in_filenames_vt.size() - 1 ) //still has file unread
- {
- io_stat1 = 2;
-
- if ( filetype == 0 )
- {
- fclose ( fp );
- }
- else if ( filetype == 1 )
- {
- pclose ( fp );
- }
- else if ( filetype == 2 )
- {
- samclose ( fp3 );
- state = -3;
- readstate = 0;
- }
-
- file_num++;
- //open a new file ...
- string temp;
- size_t found;
- found = in_filenames_vt[file_num].find_last_of ( "." );
-
- if ( found == string::npos )
- {
- //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
- fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 0;
- }
- else
- {
- temp = in_filenames_vt[file_num].substr ( found );
-
- if ( temp.compare ( ".gz" ) == 0 ) //gzip
- {
- //temp = "gzip -dc ";
- //temp.append(in_filenames_vt[file_num]);
- //fp = popen(temp.c_str(),"r");
- fp = ( FILE * ) open_file_robust ( "gz", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 1;
- }
- else if ( temp.compare ( ".bam" ) == 0 ) //bam
- {
- //fp3 = openFile4readb(in_filenames_vt[file_num].c_str());
- fp3 = ( samfile_t * ) open_file_robust ( "bam", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 2;
- }
- else
- {
- //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
- fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
- filetype = 0;
- }
- }
-
- if ( !fp && !fp3 )
- {
- fprintf ( stderr, "ERRPR: can't open file %s \n", in_filenames_vt[file_num].c_str() );
- exit ( 1 );
- }
-
- //fprintf(stderr,"processing file %d %s \n",file_num,in_filenames_vt[file_num].c_str());
- fprintf ( stderr, "Import reads from file:\n %s\n", in_filenames_vt[file_num].c_str() );
- }
- else
- {
- io_stat1 = 3;
- }
-
- seq_t = read_buf1;
- read_num = read_num1;
-
- if ( io_stat1 == 3 )
- {
- //fprintf(stderr,"Io thread's job is finished! all reads: %llu \n",reads_all_num);
- if ( filetype == 0 )
- {
- fclose ( fp );
- }
- else if ( filetype == 1 )
- {
- pclose ( fp );
- }
- else if ( filetype == 2 )
- {
- samclose ( fp3 );
- state = -3;
- readstate = 0;
- }
-
- io_ready = 2;
- break;
- }
-
- io_ready = 1;
- }
- }
-
- return NULL;
+ io_para_main *paras;
+ paras = ( io_para_main * ) arg;
+
+ if ( !paras )
+ {
+ fprintf ( stderr, "ERROR: the argument passed to main io thread is NULL!\n" );
+ exit ( -1 );
+ }
+
+ int read_buf_sz = paras->read_buf_sz;
+ vector<string> in_filenames_vt = * ( paras->in_filenames_vt );
+ /*
+ for(int i=0;i<in_filenames_vt.size();i++){
+ printf("%s\n", in_filenames_vt[i].c_str());
+ }*/
+ int read_num0 = 0;
+ int read_num1 = 0;
+ char line[1024];
+ int read_buf_len = 1024;
+ FILE *fp = NULL; //for normal and gzip reading
+ samfile_t *fp3 = NULL; //for bam reading
+ int filetype = 0; //0 normal 1 gzip 2 bam
+ int file_num = 0;
+
+ if ( in_filenames_vt.size() >= 1 )
+ {
+ string temp;
+ size_t found;
+ found = in_filenames_vt[file_num].find_last_of ( "." );
+
+ if ( found == string::npos )
+ {
+ //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
+ fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 0;
+ }
+ else
+ {
+ temp = in_filenames_vt[file_num].substr ( found );
+
+ if ( temp.compare ( ".gz" ) == 0 ) //gzip
+ {
+ //temp = "gzip -dc ";
+ //temp.append(in_filenames_vt[file_num]);
+ //fp = popen(temp.c_str(),"r");
+ fp = ( FILE * ) open_file_robust ( "gz", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 1;
+ }
+ else if ( temp.compare ( ".bam" ) == 0 ) //bam
+ {
+ //fp3 = openFile4readb(in_filenames_vt[file_num].c_str());
+ fp3 = ( samfile_t * ) open_file_robust ( "bam", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 2;
+ }
+ else
+ {
+ //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
+ fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 0;
+ }
+ }
+
+ if ( !fp && !fp3 )
+ {
+ fprintf ( stderr, "ERROR: can't open file %s \n", in_filenames_vt[file_num].c_str() );
+ exit ( 1 );
+ }
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: input filenames vector is empty! please check the reads config file,option \"asm_flags\" is requried!\n" );
+ exit ( 1 );
+ }
+
+ //fprintf(stderr,"processing file %d %s \n",file_num,in_filenames_vt[file_num].c_str());
+ fprintf ( stderr, "Import reads from file:\n %s\n", in_filenames_vt[file_num].c_str() );
+
+ while ( 1 )
+ {
+ while ( ( io_stat0 ) && ( io_stat1 ) )
+ {
+ usleep ( 1 );
+ }
+
+ if ( ! ( io_stat0 ) ) //fill buf0
+ {
+ io_stat0 = 1;// reading reads stat
+ int ready = 0;
+ int i = 0;
+
+ if ( filetype == 0 || filetype == 1 ) //normal or gzip reading ...
+ {
+ while ( i < read_buf_sz )
+ {
+ if ( fgets ( line, read_buf_len, fp ) != NULL )
+ {
+ switch ( line[0] )
+ {
+ case '@':
+ case '>':
+ ready = 1;
+ break;
+
+ case '+':
+ ready = 0;
+ break;
+
+ default:
+
+ if ( ready )
+ {
+ read_buf0[i].clear();
+ read_buf0[i].append ( line );
+ i++;
+ }
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+ else if ( filetype == 2 ) //bam reading
+ {
+ int type = 0;
+ char src_name[128];
+
+ while ( i < read_buf_sz && readstate >= 0 ) //readstate
+ {
+ read1seqbam ( line, src_name, read_buf_len, fp3, &type );
+
+ if ( type != -1 )
+ {
+ read_buf0[i].clear();
+ read_buf0[i].append ( line );
+ //cout<<"line:"<<line<<endl;
+ //cout<<"buf0:"<<read_buf0[i]<<endl;
+ i++;
+ }
+ }
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: filetype is not support or filename has a wrong suffix!\n" );
+ exit ( -1 );
+ }
+
+ read_num0 = i;
+ reads_all_num += i;
+
+ while ( io_ready != 0 )
+ {
+ usleep ( 1 );
+ }; //wait for main send work sign
+
+ if ( i == read_buf_sz )
+ {
+ io_stat0 = 2;
+ }
+ else if ( i != read_buf_sz && file_num < in_filenames_vt.size() - 1 ) //still has file unread
+ {
+ io_stat0 = 2;
+
+ if ( filetype == 0 )
+ {
+ fclose ( fp );
+ }
+ else if ( filetype == 1 )
+ {
+ pclose ( fp );
+ }
+ else if ( filetype == 2 )
+ {
+ samclose ( fp3 );
+ state = -3;
+ readstate = 0;
+ }
+
+ file_num++;
+ //open a new file ...
+ string temp;
+ size_t found;
+ found = in_filenames_vt[file_num].find_last_of ( "." );
+
+ if ( found == string::npos )
+ {
+ //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
+ fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 0;
+ }
+ else
+ {
+ temp = in_filenames_vt[file_num].substr ( found );
+
+ if ( temp.compare ( ".gz" ) == 0 ) //gzip
+ {
+ //temp = "gzip -dc ";
+ //temp.append(in_filenames_vt[file_num]);
+ //fp = popen(temp.c_str(),"r");
+ fp = ( FILE * ) open_file_robust ( "gz", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 1;
+ }
+ else if ( temp.compare ( ".bam" ) == 0 ) //bam
+ {
+ //fp3 = openFile4readb(in_filenames_vt[file_num].c_str());
+ fp3 = ( samfile_t * ) open_file_robust ( "bam", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 2;
+ }
+ else
+ {
+ //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
+ fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 0;
+ }
+ }
+
+ if ( !fp && !fp3 )
+ {
+ fprintf ( stderr, "ERROR: can't open file %s \n", in_filenames_vt[file_num].c_str() );
+ exit ( 1 );
+ }
+
+ //fprintf(stderr, "processing file %d %s \n",file_num,in_filenames_vt[file_num].c_str());
+ fprintf ( stderr, "Import reads from file:\n %s\n", in_filenames_vt[file_num].c_str() );
+ }
+ else
+ {
+ io_stat0 = 3;
+ }
+
+ seq_t = read_buf0;
+ read_num = read_num0;
+
+ if ( io_stat0 == 3 )
+ {
+ //printf("Io thread's job is finished! all reads: %llu \n",reads_all_num);
+
+ //close the file...
+ if ( filetype == 0 )
+ {
+ fclose ( fp );
+ }
+ else if ( filetype == 1 )
+ {
+ pclose ( fp );
+ }
+ else if ( filetype == 2 )
+ {
+ samclose ( fp3 );
+ state = -3;
+ readstate = 0;
+ }
+
+ io_ready = 2;
+ break;
+ }
+
+ io_ready = 1;
+ }
+
+ if ( ! ( io_stat1 ) ) //fill buf1
+ {
+ io_stat1 = 1; //reading...
+ int ready = 0;
+ int i = 0;
+
+ if ( filetype == 0 || filetype == 1 ) //normal or gzip reading ...
+ {
+ while ( i < read_buf_sz && fp )
+ {
+ if ( fgets ( line, read_buf_len, fp ) != NULL )
+ {
+ switch ( line[0] )
+ {
+ case '@':
+ case '>':
+ ready = 1;
+ break;
+
+ case '+':
+ ready = 0;
+ break;
+
+ default:
+
+ if ( ready )
+ {
+ read_buf1[i].clear();
+ read_buf1[i].append ( line );
+ i++;
+ }
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+ else if ( filetype == 2 ) //bam reading
+ {
+ int type = 0;
+ char src_name[128];
+
+ while ( i < read_buf_sz && readstate >= 0 ) //readstate
+ {
+ read1seqbam ( line, src_name, read_buf_len, fp3, &type );
+
+ if ( type != -1 )
+ {
+ read_buf1[i].clear();
+ read_buf1[i].append ( line );
+ i++;
+ }
+ }
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: filetype is not support or filename has a wrong suffix!\n" );
+ exit ( 1 );
+ }
+
+ read_num1 = i;
+ reads_all_num += i;
+
+ while ( io_ready != 0 )
+ {
+ usleep ( 1 );
+ }; //wait for main send work sign
+
+ if ( i == read_buf_sz && ( fp || fp3 ) )
+ {
+ io_stat1 = 2;
+ }
+ else if ( i != read_buf_sz && file_num < in_filenames_vt.size() - 1 ) //still has file unread
+ {
+ io_stat1 = 2;
+
+ if ( filetype == 0 )
+ {
+ fclose ( fp );
+ }
+ else if ( filetype == 1 )
+ {
+ pclose ( fp );
+ }
+ else if ( filetype == 2 )
+ {
+ samclose ( fp3 );
+ state = -3;
+ readstate = 0;
+ }
+
+ file_num++;
+ //open a new file ...
+ string temp;
+ size_t found;
+ found = in_filenames_vt[file_num].find_last_of ( "." );
+
+ if ( found == string::npos )
+ {
+ //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
+ fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 0;
+ }
+ else
+ {
+ temp = in_filenames_vt[file_num].substr ( found );
+
+ if ( temp.compare ( ".gz" ) == 0 ) //gzip
+ {
+ //temp = "gzip -dc ";
+ //temp.append(in_filenames_vt[file_num]);
+ //fp = popen(temp.c_str(),"r");
+ fp = ( FILE * ) open_file_robust ( "gz", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 1;
+ }
+ else if ( temp.compare ( ".bam" ) == 0 ) //bam
+ {
+ //fp3 = openFile4readb(in_filenames_vt[file_num].c_str());
+ fp3 = ( samfile_t * ) open_file_robust ( "bam", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 2;
+ }
+ else
+ {
+ //fp = fopen(in_filenames_vt[file_num].c_str(),"r"); //normal
+ fp = ( FILE * ) open_file_robust ( "plain", in_filenames_vt[file_num].c_str(), "r" );
+ filetype = 0;
+ }
+ }
+
+ if ( !fp && !fp3 )
+ {
+ fprintf ( stderr, "ERRPR: can't open file %s \n", in_filenames_vt[file_num].c_str() );
+ exit ( 1 );
+ }
+
+ //fprintf(stderr,"processing file %d %s \n",file_num,in_filenames_vt[file_num].c_str());
+ fprintf ( stderr, "Import reads from file:\n %s\n", in_filenames_vt[file_num].c_str() );
+ }
+ else
+ {
+ io_stat1 = 3;
+ }
+
+ seq_t = read_buf1;
+ read_num = read_num1;
+
+ if ( io_stat1 == 3 )
+ {
+ //fprintf(stderr,"Io thread's job is finished! all reads: %llu \n",reads_all_num);
+ if ( filetype == 0 )
+ {
+ fclose ( fp );
+ }
+ else if ( filetype == 1 )
+ {
+ pclose ( fp );
+ }
+ else if ( filetype == 2 )
+ {
+ samclose ( fp3 );
+ state = -3;
+ readstate = 0;
+ }
+
+ io_ready = 2;
+ break;
+ }
+
+ io_ready = 1;
+ }
+ }
+
+ return NULL;
}
@@ -705,121 +728,129 @@ Output:
Return:
None.
*************************************************/
-void read1seqbam ( char * src_seq, char * src_name, int max_read_len, samfile_t * in, int * type ) //read one sequence from bam file
+void read1seqbam ( char *src_seq, char *src_name, int max_read_len, samfile_t *in, int *type ) //read one sequence from bam file
{
- bam1_t * b = bam_init1 ();
- char c;
- char * line1 = NULL;
- int n = 0;
- int len;
- int i, j;
- char * seq1;
- unsigned int flag1 = 0;
- *type = 0;
- readstate = 0;
-
- if ( ( readstate = samread ( in, b ) ) >= 0 )
- {
- if ( !__g_skip_aln ( in->header, b ) )
- {
- line1 = bam_format1_core ( in->header, b, in->type >> 2 & 3 );
- }
-
- //printf("%s\n", line2);
- seq1 = strtok ( line1, "\t" );
-
- for ( i = 0; i < 10; i++ )
- {
- if ( i == 0 )
- {
- sscanf ( seq1, "%s", src_name );
- }
- else if ( i == 1 )
- {
- flag1 = atoi ( seq1 );
-
- if ( flag1 & 0x0200 ) //whether it's good or not
- {
- //state(1st read state, 2nd read state) : -3(init), -2(0), -1(1), 0(0, 0), 1(0, 1), 2(1, 0), 3(1, 1)
- switch ( state )
- {
- case -3:
- state = -2;
- break;
- case -2:
- state = 0;
- break;
- case -1:
- state = 2;
- break;
- default:
- state = -3;
- }
- }
- else
- {
- switch ( state )
- {
- case -3:
- state = -1;
- break;
- case -2:
- state = 1;
- break;
- case -1:
- state = 3;
- break;
- default:
- state = -3;
- }
- }
- }
- else if ( i == 9 ) //the sequence
- {
- //printf("%s\n", seq1);
- len = strlen ( seq1 );
-
- if ( len + n > max_read_len )
- { len = max_read_len - n; }
-
- for ( j = 0; j < len; j++ )
- {
- if ( seq1[j] >= 'a' && seq1[j] <= 'z' )
- {
- src_seq[n++] = ( char ) ( seq1[j] - 'a' + 'A' );
- }
- else if ( seq1[j] >= 'A' && seq1[j] <= 'Z' )
- {
- src_seq[n++] = seq1[j];
- // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
- else if ( seq1[j] == '.' )
- {
- src_seq[n++] = 'A';
- } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
-
- if ( 3 == state )
- {
- state = -3;
- }
- else
- {
- if ( 0 == state || 1 == state || 2 == state )
- {
- state = -3;
- *type = -1;
- }
- }
- }
-
- seq1 = strtok ( NULL, "\t" );
- }
- }
-
- free ( line1 );
- bam_destroy1 ( b );
- src_seq[n++] = '\0';
+ bam1_t *b = bam_init1 ();
+ char c;
+ char *line1 = NULL;
+ int n = 0;
+ int len;
+ int i, j;
+ char *seq1;
+ unsigned int flag1 = 0;
+ *type = 0;
+ readstate = 0;
+
+ if ( ( readstate = samread ( in, b ) ) >= 0 )
+ {
+ if ( !__g_skip_aln ( in->header, b ) )
+ {
+ line1 = bam_format1_core ( in->header, b, in->type >> 2 & 3 );
+ }
+
+ //printf("%s\n", line2);
+ seq1 = strtok ( line1, "\t" );
+
+ for ( i = 0; i < 10; i++ )
+ {
+ if ( i == 0 )
+ {
+ sscanf ( seq1, "%s", src_name );
+ }
+ else if ( i == 1 )
+ {
+ flag1 = atoi ( seq1 );
+
+ if ( flag1 & 0x0200 ) //whether it's good or not
+ {
+ //state(1st read state, 2nd read state) : -3(init), -2(0), -1(1), 0(0, 0), 1(0, 1), 2(1, 0), 3(1, 1)
+ switch ( state )
+ {
+ case -3:
+ state = -2;
+ break;
+
+ case -2:
+ state = 0;
+ break;
+
+ case -1:
+ state = 2;
+ break;
+
+ default:
+ state = -3;
+ }
+ }
+ else
+ {
+ switch ( state )
+ {
+ case -3:
+ state = -1;
+ break;
+
+ case -2:
+ state = 1;
+ break;
+
+ case -1:
+ state = 3;
+ break;
+
+ default:
+ state = -3;
+ }
+ }
+ }
+ else if ( i == 9 ) //the sequence
+ {
+ //printf("%s\n", seq1);
+ len = strlen ( seq1 );
+
+ if ( len + n > max_read_len )
+ {
+ len = max_read_len - n;
+ }
+
+ for ( j = 0; j < len; j++ )
+ {
+ if ( seq1[j] >= 'a' && seq1[j] <= 'z' )
+ {
+ src_seq[n++] = ( char ) ( seq1[j] - 'a' + 'A' );
+ }
+ else if ( seq1[j] >= 'A' && seq1[j] <= 'Z' )
+ {
+ src_seq[n++] = seq1[j];
+ // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ else if ( seq1[j] == '.' )
+ {
+ src_seq[n++] = 'A';
+ } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+
+ if ( 3 == state )
+ {
+ state = -3;
+ }
+ else
+ {
+ if ( 0 == state || 1 == state || 2 == state )
+ {
+ state = -3;
+ *type = -1;
+ }
+ }
+ }
+
+ seq1 = strtok ( NULL, "\t" );
+ }
+ }
+
+ free ( line1 );
+ bam_destroy1 ( b );
+ src_seq[n++] = '\0';
}
/*************************************************
@@ -834,26 +865,26 @@ Output:
Return:
a samfile pointer
*************************************************/
-static samfile_t * openFile4readb ( const char * fname ) //open file to read bam file
+static samfile_t *openFile4readb ( const char *fname ) //open file to read bam file
{
- samfile_t * in;
- char * fn_list = 0;
-
- if ( ( in = ( samfile_t * ) samopen ( fname, "rb", fn_list ) ) == 0 )
- {
- fprintf ( stderr, "ERROR: Cannot open %s. Now exit to system...\n", fname );
- return NULL;
- //exit (-1);
- }
-
- if ( in->header == 0 )
- {
- fprintf ( stderr, "ERROR: Cannot read the header.\n" );
- return NULL;
- //exit (-1);
- }
-
- return ( in );
+ samfile_t *in;
+ char *fn_list = 0;
+
+ if ( ( in = ( samfile_t * ) samopen ( fname, "rb", fn_list ) ) == 0 )
+ {
+ fprintf ( stderr, "ERROR: Cannot open %s. Now exit to system...\n", fname );
+ return NULL;
+ //exit (-1);
+ }
+
+ if ( in->header == 0 )
+ {
+ fprintf ( stderr, "ERROR: Cannot read the header.\n" );
+ return NULL;
+ //exit (-1);
+ }
+
+ return ( in );
}
@@ -874,72 +905,72 @@ Output:
Return:
A file pointer with void* type
*************************************************/
-void * open_file_robust ( const char * filetype, const char * path, const char * mode )
+void *open_file_robust ( const char *filetype, const char *path, const char *mode )
{
- void * fp = NULL;
- const int max_times = 10;
- const int max_sleep = 60;
- int cur_times = 1;
- int cur_sleep = 1;
-
- while ( !fp )
- {
- if ( strcmp ( filetype, "plain" ) == 0 )
- {
- if ( access ( path, 0 ) == 0 )
- {
- fp = fopen ( path, mode );
- }
- }
- else if ( strcmp ( filetype, "gz" ) == 0 )
- {
- char tmp[256];
- sprintf ( tmp, "gzip -dc %s", path );
-
- if ( access ( path, 0 ) == 0 )
- {
- fp = popen ( tmp, "r" );
- /*
- if(fp && feof((FILE*)fp)){ //it's useless for "file not found but popen success" bug
- pclose((FILE*)fp);
- fp = NULL;
- }*/
- }
- }
- else if ( strcmp ( filetype, "bam" ) == 0 )
- {
- if ( access ( path, 0 ) == 0 )
- {
- fp = openFile4readb ( path );
- }
- }
-
- if ( fp )
- {
- //fprintf(stderr,"%llx \n",fp);
- return fp;
- }
- else
- {
- fprintf ( stderr, "ERROR: open file %s failed!\n", path );
- fprintf ( stderr, "try opening it again after %d seconds\n", cur_sleep );
- sleep ( cur_sleep );
- cur_times ++;
- cur_sleep *= 2;
-
- if ( cur_sleep >= max_sleep )
- {
- cur_sleep = max_sleep;
- }
-
- if ( cur_times > max_times )
- {
- fprintf ( stderr, "ERROR: can't open file %s , now exit system !!!", path );
- exit ( -1 );
- return NULL;
- }
- }
- }
+ void *fp = NULL;
+ const int max_times = 10;
+ const int max_sleep = 60;
+ int cur_times = 1;
+ int cur_sleep = 1;
+
+ while ( !fp )
+ {
+ if ( strcmp ( filetype, "plain" ) == 0 )
+ {
+ if ( access ( path, 0 ) == 0 )
+ {
+ fp = fopen ( path, mode );
+ }
+ }
+ else if ( strcmp ( filetype, "gz" ) == 0 )
+ {
+ char tmp[256];
+ sprintf ( tmp, "gzip -dc %s", path );
+
+ if ( access ( path, 0 ) == 0 )
+ {
+ fp = popen ( tmp, "r" );
+ /*
+ if(fp && feof((FILE*)fp)){ //it's useless for "file not found but popen success" bug
+ pclose((FILE*)fp);
+ fp = NULL;
+ }*/
+ }
+ }
+ else if ( strcmp ( filetype, "bam" ) == 0 )
+ {
+ if ( access ( path, 0 ) == 0 )
+ {
+ fp = openFile4readb ( path );
+ }
+ }
+
+ if ( fp )
+ {
+ //fprintf(stderr,"%llx \n",fp);
+ return fp;
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: open file %s failed!\n", path );
+ fprintf ( stderr, "try opening it again after %d seconds\n", cur_sleep );
+ sleep ( cur_sleep );
+ cur_times ++;
+ cur_sleep *= 2;
+
+ if ( cur_sleep >= max_sleep )
+ {
+ cur_sleep = max_sleep;
+ }
+
+ if ( cur_times > max_times )
+ {
+ fprintf ( stderr, "ERROR: can't open file %s , now exit system !!!", path );
+ exit ( -1 );
+ return NULL;
+ }
+ }
+ }
}
diff --git a/sparsePregraph/main.cpp b/sparsePregraph/main.cpp
index cd65a85..51f453c 100644
--- a/sparsePregraph/main.cpp
+++ b/sparsePregraph/main.cpp
@@ -1,11 +1,11 @@
#include <stdio.h>
#include <stdlib.h>
-extern "C" int call_pregraph_sparse ( int argc, char ** argv );
+extern "C" int call_pregraph_sparse ( int argc, char **argv );
-int main ( int argc, char ** argv )
+int main ( int argc, char **argv )
{
- fprintf ( stderr, "\nVersion 1.0.3: released on July 13th, 2012\nCompile %s\t%s\n\n", __DATE__, __TIME__ );
- call_pregraph_sparse ( argc, argv );
+ fprintf ( stderr, "\nVersion 1.0.3: released on July 13th, 2012\nCompile %s\t%s\n\n", __DATE__, __TIME__ );
+ call_pregraph_sparse ( argc, argv );
}
diff --git a/sparsePregraph/multi_threads.cpp b/sparsePregraph/multi_threads.cpp
index 5aa755d..2dc5700 100644
--- a/sparsePregraph/multi_threads.cpp
+++ b/sparsePregraph/multi_threads.cpp
@@ -1,7 +1,7 @@
/*
* multi_threads.cpp
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -29,100 +29,112 @@
#include "build_preArc.h"
-void creatThrds ( pthread_t * threads, PARAMETER * paras )
+void creatThrds ( pthread_t *threads, PARAMETER *paras )
{
- unsigned char i;
- int temp;
-
- for ( i = 0; i < thrd_num_s; i++ )
- {
- if ( ( temp = pthread_create ( &threads[i], NULL, threadRoutine, & ( paras[i] ) ) ) != 0 )
- {
- fprintf ( stderr, "ERROR: create threads failed.\n" );
- exit ( 1 );
- }
- }
-
- //fprintf(stderr,"%d work threads created.\n",thrd_num_s);
- fprintf ( stderr, "%d work threads initialized.\n", thrd_num_s );
+ unsigned char i;
+ int temp;
+
+ for ( i = 0; i < thrd_num_s; i++ )
+ {
+ if ( ( temp = pthread_create ( &threads[i], NULL, threadRoutine, & ( paras[i] ) ) ) != 0 )
+ {
+ fprintf ( stderr, "ERROR: create threads failed.\n" );
+ exit ( 1 );
+ }
+ }
+
+ //fprintf(stderr,"%d work threads created.\n",thrd_num_s);
+ fprintf ( stderr, "%d work threads initialized.\n", thrd_num_s );
}
-void * threadRoutine ( void * para )
+void *threadRoutine ( void *para )
{
- PARAMETER * prm;
- int i;
- unsigned char id;
- prm = ( PARAMETER * ) para;
- id = prm->threadID;
- struct hashtable2 * ht = prm->ht;
- int K_size = prm->K_size;
- int gap = prm->gap;
-
- while ( 1 )
- {
- if ( * ( prm->selfSignal ) == 3 )
- {
- * ( prm->selfSignal ) = 0;
- break;
- }
- else if ( * ( prm->selfSignal ) == 10 )
- {
- run_process_threaded ( ht, locks, K_size, gap, read_num, thrd_num_s, prm->threadID, 1 );
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 11 )
- {
- run_process_threaded ( ht, locks, K_size, gap, read_num, thrd_num_s, prm->threadID, 2 );
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 12 )
- {
- for ( int i = prm->threadID ; i < read_num; i += thrd_num_s )
- {
- int bad_flag = 0;
- filter_N ( seq_t[i], bad_flag );
-
- if ( bad_flag ) {seq_t[i].clear(); continue;}
-
- process_1read_preArc ( prm->preArcs, locks, prm->threadID, prm->v_ht, K_size, prm->cut_off_len, seq_t[i].c_str() );
- }
-
- * ( prm->selfSignal ) = 0;
- }
-
- usleep ( 1 );
- }
+ PARAMETER *prm;
+ int i;
+ unsigned char id;
+ prm = ( PARAMETER * ) para;
+ id = prm->threadID;
+ struct hashtable2 *ht = prm->ht;
+ int K_size = prm->K_size;
+ int gap = prm->gap;
+
+ while ( 1 )
+ {
+ if ( * ( prm->selfSignal ) == 3 )
+ {
+ * ( prm->selfSignal ) = 0;
+ break;
+ }
+ else if ( * ( prm->selfSignal ) == 10 )
+ {
+ run_process_threaded ( ht, locks, K_size, gap, read_num, thrd_num_s, prm->threadID, 1 );
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 11 )
+ {
+ run_process_threaded ( ht, locks, K_size, gap, read_num, thrd_num_s, prm->threadID, 2 );
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 12 )
+ {
+ for ( int i = prm->threadID ; i < read_num; i += thrd_num_s )
+ {
+ int bad_flag = 0;
+ filter_N ( seq_t[i], bad_flag );
+
+ if ( bad_flag )
+ {
+ seq_t[i].clear();
+ continue;
+ }
+
+ process_1read_preArc ( prm->preArcs, locks, prm->threadID, prm->v_ht, K_size, prm->cut_off_len, seq_t[i].c_str() );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+
+ usleep ( 1 );
+ }
}
-void thread_wait ( pthread_t * threads )
+void thread_wait ( pthread_t *threads )
{
- int i;
+ int i;
- for ( i = 0; i < thrd_num_s; i++ )
- if ( threads[i] != 0 )
- { pthread_join ( threads[i], NULL ); }
+ for ( i = 0; i < thrd_num_s; i++ )
+ if ( threads[i] != 0 )
+ {
+ pthread_join ( threads[i], NULL );
+ }
}
-void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals )
+void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals )
{
- int t;
-
- for ( t = 0; t < thrd_num_s; t++ )
- { thrdSignals[t + 1] = SIG; }
-
- while ( 1 )
- {
- usleep ( 10 );
-
- for ( t = 0; t < thrd_num_s; t++ )
- if ( thrdSignals[t + 1] )
- { break; }
-
- if ( t == thrd_num_s )
- { break; }
- }
+ int t;
+
+ for ( t = 0; t < thrd_num_s; t++ )
+ {
+ thrdSignals[t + 1] = SIG;
+ }
+
+ while ( 1 )
+ {
+ usleep ( 10 );
+
+ for ( t = 0; t < thrd_num_s; t++ )
+ if ( thrdSignals[t + 1] )
+ {
+ break;
+ }
+
+ if ( t == thrd_num_s )
+ {
+ break;
+ }
+ }
}
diff --git a/sparsePregraph/pregraph_sparse.cpp b/sparsePregraph/pregraph_sparse.cpp
index cd422f3..3b24592 100644
--- a/sparsePregraph/pregraph_sparse.cpp
+++ b/sparsePregraph/pregraph_sparse.cpp
@@ -1,7 +1,7 @@
/*
* pregraph_sparse.cpp
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -38,8 +38,8 @@
static int LOAD_GRAPH = 0, BUILD_DBG = 1, BUILD_EDGES = 1, BUILD_PREARCS = 1;
//static int run_mode=0;
-extern "C" int call_pregraph_sparse ( int argc, char ** argv );
-static void initenv ( int argc, char ** argv );
+extern "C" int call_pregraph_sparse ( int argc, char **argv );
+static void initenv ( int argc, char **argv );
static void parse_args ( vector<string> &in_filenames_vt );
static void display_pregraph_usage();
@@ -76,619 +76,636 @@ Output:
Return:
None.
*************************************************/
-extern "C" int call_pregraph_sparse ( int argc, char ** argv )
+extern "C" int call_pregraph_sparse ( int argc, char **argv )
{
- time_t all_beg_time, all_end_time;
- time ( &all_beg_time );
- initenv ( argc, argv );
- vector<string> in_filenames_vt;
- parse_args ( in_filenames_vt );
- struct hashtable2 ht2;
- size_t hashTableSZ = 1000000;
- time_t beg_time, read_time;
- size_t bucket_count = 0, edge_cnt = 0;
-
- if ( ( !LOAD_GRAPH ) && BUILD_DBG )
- {
- int round = 1;
-
- for ( round = 1; round <= 2; round++ )
- {
- //fprintf(stderr,"Building the sparse de Brujin graph, round: %d\n",round);
- fprintf ( stderr, "Start to build the sparse de Brujin graph, round: %d\n", round );
-
- if ( round == 1 )
- {
- time ( &beg_time );
- edge_cnt = 0;
- uint64_t TotalSamplings = 0;
-
- //initialize the hashtable size
- if ( GenomeSize == 0 )
- {
- fprintf ( stderr, "Error! Genome size not given.\n" );
- return -1;
- }
-
- hashTableSZ = ( size_t ) GenomeSize / max ( gap - 1, 5 );
- int read_buf_sz = 102400 * thrd_num_s;
- Init_HT2 ( &ht2, hashTableSZ );
- //create main io thread
- read_buf0 = new string[read_buf_sz];
- read_buf1 = new string[read_buf_sz];
- io_stat0 = 1; //must be one, if io_stat0 equals 0, the io threads will work immediately
- io_stat1 = 1;
- io_ready = 0;
- io_para_main io_para_mains;
- io_para_mains.read_buf_sz = read_buf_sz;
- io_para_mains.in_filenames_vt = &in_filenames_vt;
- pthread_t io_thread;
- int temp;
-
- // fprintf(stderr,"Creating main io thread ...\n");
- if ( ( temp = pthread_create ( &io_thread, NULL, run_io_thread_main, &io_para_mains ) ) != 0 )
- {
- fprintf ( stderr, "ERROR: failed creating main io thread.\n" );
- exit ( -1 );
- }
-
- fprintf ( stderr, "1 io thread initialized.\n" );
- //create work threads for round 1
- pthread_t threads[thrd_num_s];
- unsigned char thrdSignal[thrd_num_s + 1];
- PARAMETER paras[thrd_num_s];
- locks = ( pthread_spinlock_t * ) calloc ( ht2.ht_sz, sizeof ( pthread_spinlock_t ) );
-
- //initialize the locks unlock
- for ( size_t i = 0; i < ht2.ht_sz; ++i )
- {
- locks[i] = 1;
- }
-
- //create threads
- //fprintf(stderr,"Creating work threads ...\n");
- bucket_count_total = ( size_t * ) calloc ( thrd_num_s, sizeof ( size_t ) );
- edge_cnt_total = ( size_t * ) calloc ( thrd_num_s, sizeof ( size_t ) );
-
- for ( int k = 0; k < thrd_num_s; k++ )
- {
- thrdSignal[k + 1] = 0;
- paras[k].threadID = k;
- paras[k].mainSignal = &thrdSignal[0];
- paras[k].selfSignal = &thrdSignal[k + 1];
- paras[k].ht = &ht2;
- paras[k].K_size = K_size;
- paras[k].gap = gap;
- }
-
- creatThrds ( threads, paras );
- thrdSignal[0] = 0;
- //begin to work
- size_t processed_reads = 0;
-
- while ( 1 )
- {
- sendIOWorkSignal();
-
- while ( io_ready == 0 ) {usleep ( 1 );}
-
- if ( io_ready )
- {
- processed_reads += read_num;
- sendWorkSignal ( 10, thrdSignal );
-
- for ( int k1 = 0; k1 < thrd_num_s; ++k1 )
- {
- bucket_count += bucket_count_total[k1];
- bucket_count_total[k1] = 0;
- }
- }
-
- if ( io_ready == 2 )
- {
- //fprintf(stderr,"All reads have been processed !\n");
- break;
- }
- }
-
- sendWorkSignal ( 3, thrdSignal );
- thread_wait ( threads );
- SwitchBuckets ( &ht2, K_size );
-
- for ( size_t i = 0; i < ht2.ht_sz; ++i ) //this procedure can be removed
- {
- struct bucket2 * bktptr = ht2.store_pos[i];
-
- while ( bktptr != NULL )
- {
- bktptr->kmer_info.cov1 = 0;
- bktptr = bktptr->nxt_bucket;
- }
- }
-
- free ( ( void * ) bucket_count_total );
- free ( ( void * ) locks );
- free ( ( void * ) edge_cnt_total );
- delete [] read_buf0;
- delete [] read_buf1;
- time ( &read_time );
- //fprintf(stderr,"Round 1 consumes time: %.fs.\n",difftime(read_time,beg_time));
- //fprintf(stderr,"Number of processed reads: %llu \n",processed_reads);
- fprintf ( stderr, "Time spent on building graph round 1: %.fs, %llu reads processed, %llu nodes allocated\n",
- difftime ( read_time, beg_time ), processed_reads, bucket_count );
- fprintf ( stderr, "\n" );
- }
-
- if ( round == 2 )
- {
- time ( &beg_time );
- edge_cnt = 0;
- //create main io thread
- int read_buf_sz = 102400 * thrd_num_s;
- read_buf0 = new string[read_buf_sz];
- read_buf1 = new string[read_buf_sz];
- io_stat0 = 1; //must be one, if io_stat0 =0 ,the io thread will work immediately
- io_stat1 = 1;
- io_ready = 0;
- io_para_main io_para_mains;
- io_para_mains.read_buf_sz = read_buf_sz;
- io_para_mains.in_filenames_vt = &in_filenames_vt;
- pthread_t io_thread;
- int temp;
-
- //fprintf(stderr,"Creating main io thread ...\n");
- if ( ( temp = pthread_create ( &io_thread, NULL, run_io_thread_main, &io_para_mains ) ) != 0 )
- {
- fprintf ( stderr, "ERROR: failed creating main io thread.\n" );
- exit ( -1 );
- }
-
- fprintf ( stderr, "1 io thread initialized.\n" );
- //create work threads for round 2
- pthread_t threads[thrd_num_s];
- unsigned char thrdSignal[thrd_num_s + 1];
- PARAMETER paras[thrd_num_s];
- locks = ( pthread_spinlock_t * ) calloc ( ht2.ht_sz, sizeof ( pthread_spinlock_t ) );
-
- //initialize locks unlock
- for ( size_t i = 0; i < ht2.ht_sz; ++i )
- {
- locks[i] = 1;
- }
-
- //create threads
- //fprintf(stderr,"Creating work threads ...\n");
- bucket_count_total = ( size_t * ) calloc ( thrd_num_s, sizeof ( size_t ) );
- edge_cnt_total = ( size_t * ) calloc ( thrd_num_s, sizeof ( size_t ) );
-
- for ( int k = 0; k < thrd_num_s; k++ )
- {
- thrdSignal[k + 1] = 0;
- paras[k].threadID = k;
- paras[k].mainSignal = &thrdSignal[0];
- paras[k].selfSignal = &thrdSignal[k + 1];
- paras[k].ht = &ht2;
- paras[k].K_size = K_size;
- paras[k].gap = gap;
- }
-
- creatThrds ( threads, paras );
- thrdSignal[0] = 0;
- //begin to work
- int foundcount = 0;
- int flipcount = 0;
- size_t processed_reads = 0;
-
- while ( 1 )
- {
- sendIOWorkSignal();
-
- while ( io_ready == 0 ) {usleep ( 1 );}
-
- if ( io_ready )
- {
- //read_c = read_num;
- processed_reads += read_num;
- sendWorkSignal ( 11, thrdSignal );
-
- for ( int k1 = 0; k1 < thrd_num_s; ++k1 )
- {
- edge_cnt += edge_cnt_total[k1];
- edge_cnt_total[k1] = 0;
- }
- }
-
- if ( io_ready == 2 )
- {
- //fprintf(stderr,"All reads have been processed !\n");
- break;
- }
- }
-
- sendWorkSignal ( 3, thrdSignal );
- thread_wait ( threads );
- free ( ( void * ) bucket_count_total );
- free ( ( void * ) locks );
- free ( ( void * ) edge_cnt_total );
- delete [] read_buf0;
- delete [] read_buf1;
- SavingSparseKmerGraph2 ( &ht2, graphfile );
- time ( &read_time );
- //fprintf(stderr,"Round 2 consumed time: %.fs.\n",difftime(read_time,beg_time));
- //fprintf(stderr,"Number of processed reads: %llu \n",processed_reads);
- fprintf ( stderr, "Time spent on building graph round 2: %.fs, %llu reads processed\n", difftime ( read_time, beg_time ), processed_reads );
- fprintf ( stderr, "%llu nodes allocated, %llu kmer-edges allocated.\n", bucket_count, edge_cnt );
- fprintf ( stderr, "\n" );
- }
- }
- }
-
- if ( LOAD_GRAPH )
- {
- fprintf ( stderr, "Loading the graph ...\n" );
- LoadingSparseKmerGraph2 ( &ht2, graphfile );
- }
-
- if ( BUILD_EDGES )
- {
- RemovingWeakNodesAndEdges2 ( &ht2, K_size, NodeCovTh, EdgeCovTh, &bucket_count, &edge_cnt );
- int cut_len_tip = 2 * K_size;
- int tip_c = 0;
- time_t start, finish, interval;
- fprintf ( stderr, "Start to remove tips with minority links.\n" );
- start = time ( NULL );
- removeMinorTips ( &ht2, K_size, cut_len_tip, tip_c );
- finish = time ( NULL );
- interval = ( finish - start ) ;
- //fprintf(stderr,"Removing minor tips consumes %llu s.\n\n",interval);
- fprintf ( stderr, "Time spent on removing tips: %llus.\n\n", interval );
- fprintf ( stderr, "Start to construct edges.\n" );
- start = time ( NULL );
- char outfile[256];
- sprintf ( outfile, "%s.sparse.edge", graphfile );
- kmer2edges ( &ht2, K_size, outfile );
- free_hashtable ( &ht2 );
- char temp[256];
- sprintf ( temp, "%s.sparse.edge", graphfile );
- convert ( temp, K_size, graphfile );
- finish = time ( NULL );
- interval = ( finish - start ) ;
- //fprintf(stderr,"Building edges consumes %llu s.\n\n",interval);
- fprintf ( stderr, "Time spent on constructing edges: %llus.\n\n", interval );
- }
-
- if ( BUILD_PREARCS ) //build preArc
- {
- size_t v_sz, e_sz;
- int K_size;
- char basicInfo[128];
- sprintf ( basicInfo, "%s.preGraphBasic", graphfile );
- FILE * fin;
- char line[1024];
- char str[32];
- fin = fopen ( basicInfo, "r" );
-
- if ( !fin )
- {
- fprintf ( stderr, "ERROR: can't open file %s\n", basicInfo );
- exit ( 1 );
- }
-
- bool a = 0, b = 0;
-
- while ( fgets ( line, 1024, fin ) != NULL )
- {
- if ( line[0] == 'V' ) //VERTEX
- {
- sscanf ( line + 6, "%lu %s %d", &v_sz, str, &K_size );
- a = 1;
- }
-
- if ( line[0] == 'E' ) //EDGEs
- {
- sscanf ( line, "%s %lu", str, &e_sz );
- b = 1;
- break;
- }
- }
-
- if ( !a || !b )
- {
- fprintf ( stderr, "ERROR: preGraphBasic file is in invaild format!\n" );
- exit ( 1 );
- }
-
- vertex_hash2 v_ht;
- preArc_array arc_arr;
- char edge_file[128];
- sprintf ( edge_file, "%s.sparse.edge", graphfile );
- time_t start, finish, interval;
- //step1:
- //fprintf(stderr,"Building vertexes ...\n");
- fprintf ( stderr, "Start to build vertex indexes.\n" );
- start = time ( NULL );
- init_vertex_hash ( &v_ht, v_sz );
- build_vertexes ( &v_ht, K_size, edge_file );
- finish = time ( NULL );
- interval = ( finish - start ) ;
- //fprintf(stderr,"Building vertexes consumes %llu s.\n\n",interval);
- fprintf ( stderr, "Time spent on building vertex indexes: %llus.\n\n", interval );
- //step2:
- //fprintf(stderr,"Building preArcs ...\n");
- fprintf ( stderr, "Start to build preArcs.\n" );
- start = time ( NULL );
- int cut_off_len = 256;//tmp
- init_preArc_array ( &arc_arr, e_sz + 1 );
-
- if ( solve )
- {
- /* initialize the threads' common data mark_on_edge s_locks*/
- mark_on_edge = ( unsigned int * ) calloc ( e_sz + 1, sizeof ( unsigned int ) );
- s_locks = ( pthread_spinlock_t * ) calloc ( e_sz + 1, sizeof ( pthread_spinlock_t ) );
-
- for ( size_t i = 0; i < e_sz + 1; i++ )
- {
- s_locks[i] = 1;
- }
-
- /*buffers for seperate threads*/
- path_buffer = ( edge_path_buffer ** ) calloc ( thrd_num_s, sizeof ( edge_path_buffer ) );
-
- for ( int i = 0; i < thrd_num_s; i++ )
- {
- path_buffer[i] = create_edge_path_buffer ( mark_on_edge, s_locks, buff_size, max_path_length );
- }
-
- /*initialize the output file */
- char mark_file[128], path_file[128];
- sprintf ( mark_file, "%s.markOnEdge", graphfile );
- sprintf ( path_file, "%s.path", graphfile );
- mark_fp = fopen ( mark_file, "w" );
- path_fp = fopen ( path_file, "w" );
- pthread_mutex_init ( &file_lock, NULL );
- }
-
- build_preArc_threaded ( &arc_arr, &v_ht, K_size, cut_off_len, &in_filenames_vt, thrd_num_s );
-
- if ( solve )
- {
- //output mark_on_edge
- size_t markCounter = 0;
-
- for ( size_t i = 1; i <= e_sz; i++ )
- {
- markCounter += mark_on_edge[i];
- fprintf ( mark_fp, "%d\n", mark_on_edge[i] );
- }
-
- fprintf ( stderr, "Total number of marks in file markOnEdge: %lu\n", markCounter );
- fclose ( mark_fp );
-
- //output path_buffer
- for ( int i = 0; i < thrd_num_s; i++ )
- {
- output_edge_path_buffer ( path_buffer[i], path_fp );
- }
-
- fclose ( path_fp );
-
- //destory buffers ...
- for ( int i = 0; i < thrd_num_s; i++ )
- {
- destory_edge_path_buffer ( path_buffer[i] );
- }
-
- free ( ( void * ) mark_on_edge );
- free ( ( void * ) s_locks );
- pthread_mutex_destroy ( &file_lock );
- }
-
- finish = time ( NULL );
- interval = ( finish - start );
- //fprintf(stderr,"Building preArcs consumes %llu s.\n\n",interval);
- fprintf ( stderr, "Time spent on building preArcs: %llus.\n\n", interval );
- char prearc_file[128];
- sprintf ( prearc_file, "%s.preArc", graphfile );
- output_preArcs ( &arc_arr, prearc_file );
- }
-
- time ( &all_end_time );
- fprintf ( stderr, "Overall time spent on constructing lightgraph: %.fm.\n", difftime ( all_end_time, all_beg_time ) / 60 );
- return 0;
+ time_t all_beg_time, all_end_time;
+ time ( &all_beg_time );
+ initenv ( argc, argv );
+ vector<string> in_filenames_vt;
+ parse_args ( in_filenames_vt );
+ struct hashtable2 ht2;
+ size_t hashTableSZ = 1000000;
+ time_t beg_time, read_time;
+ size_t bucket_count = 0, edge_cnt = 0;
+
+ if ( ( !LOAD_GRAPH ) && BUILD_DBG )
+ {
+ int round = 1;
+
+ for ( round = 1; round <= 2; round++ )
+ {
+ //fprintf(stderr,"Building the sparse de Brujin graph, round: %d\n",round);
+ fprintf ( stderr, "Start to build the sparse de Brujin graph, round: %d\n", round );
+
+ if ( round == 1 )
+ {
+ time ( &beg_time );
+ edge_cnt = 0;
+ uint64_t TotalSamplings = 0;
+
+ //initialize the hashtable size
+ if ( GenomeSize == 0 )
+ {
+ fprintf ( stderr, "Error! Genome size not given.\n" );
+ return -1;
+ }
+
+ hashTableSZ = ( size_t ) GenomeSize / max ( gap - 1, 5 );
+ int read_buf_sz = 102400 * thrd_num_s;
+ Init_HT2 ( &ht2, hashTableSZ );
+ //create main io thread
+ read_buf0 = new string[read_buf_sz];
+ read_buf1 = new string[read_buf_sz];
+ io_stat0 = 1; //must be one, if io_stat0 equals 0, the io threads will work immediately
+ io_stat1 = 1;
+ io_ready = 0;
+ io_para_main io_para_mains;
+ io_para_mains.read_buf_sz = read_buf_sz;
+ io_para_mains.in_filenames_vt = &in_filenames_vt;
+ pthread_t io_thread;
+ int temp;
+
+ // fprintf(stderr,"Creating main io thread ...\n");
+ if ( ( temp = pthread_create ( &io_thread, NULL, run_io_thread_main, &io_para_mains ) ) != 0 )
+ {
+ fprintf ( stderr, "ERROR: failed creating main io thread.\n" );
+ exit ( -1 );
+ }
+
+ fprintf ( stderr, "1 io thread initialized.\n" );
+ //create work threads for round 1
+ pthread_t threads[thrd_num_s];
+ unsigned char thrdSignal[thrd_num_s + 1];
+ PARAMETER paras[thrd_num_s];
+ locks = ( pthread_spinlock_t * ) calloc ( ht2.ht_sz, sizeof ( pthread_spinlock_t ) );
+
+ //initialize the locks unlock
+ for ( size_t i = 0; i < ht2.ht_sz; ++i )
+ {
+ locks[i] = 1;
+ }
+
+ //create threads
+ //fprintf(stderr,"Creating work threads ...\n");
+ bucket_count_total = ( size_t * ) calloc ( thrd_num_s, sizeof ( size_t ) );
+ edge_cnt_total = ( size_t * ) calloc ( thrd_num_s, sizeof ( size_t ) );
+
+ for ( int k = 0; k < thrd_num_s; k++ )
+ {
+ thrdSignal[k + 1] = 0;
+ paras[k].threadID = k;
+ paras[k].mainSignal = &thrdSignal[0];
+ paras[k].selfSignal = &thrdSignal[k + 1];
+ paras[k].ht = &ht2;
+ paras[k].K_size = K_size;
+ paras[k].gap = gap;
+ }
+
+ creatThrds ( threads, paras );
+ thrdSignal[0] = 0;
+ //begin to work
+ size_t processed_reads = 0;
+
+ while ( 1 )
+ {
+ sendIOWorkSignal();
+
+ while ( io_ready == 0 )
+ {
+ usleep ( 1 );
+ }
+
+ if ( io_ready )
+ {
+ processed_reads += read_num;
+ sendWorkSignal ( 10, thrdSignal );
+
+ for ( int k1 = 0; k1 < thrd_num_s; ++k1 )
+ {
+ bucket_count += bucket_count_total[k1];
+ bucket_count_total[k1] = 0;
+ }
+ }
+
+ if ( io_ready == 2 )
+ {
+ //fprintf(stderr,"All reads have been processed !\n");
+ break;
+ }
+ }
+
+ sendWorkSignal ( 3, thrdSignal );
+ thread_wait ( threads );
+ SwitchBuckets ( &ht2, K_size );
+
+ for ( size_t i = 0; i < ht2.ht_sz; ++i ) //this procedure can be removed
+ {
+ struct bucket2 *bktptr = ht2.store_pos[i];
+
+ while ( bktptr != NULL )
+ {
+ bktptr->kmer_info.cov1 = 0;
+ bktptr = bktptr->nxt_bucket;
+ }
+ }
+
+ free ( ( void * ) bucket_count_total );
+ free ( ( void * ) locks );
+ free ( ( void * ) edge_cnt_total );
+ delete [] read_buf0;
+ delete [] read_buf1;
+ time ( &read_time );
+ //fprintf(stderr,"Round 1 consumes time: %.fs.\n",difftime(read_time,beg_time));
+ //fprintf(stderr,"Number of processed reads: %llu \n",processed_reads);
+ fprintf ( stderr, "Time spent on building graph round 1: %.fs, %llu reads processed, %llu nodes allocated\n",
+ difftime ( read_time, beg_time ), processed_reads, bucket_count );
+ fprintf ( stderr, "\n" );
+ }
+
+ if ( round == 2 )
+ {
+ time ( &beg_time );
+ edge_cnt = 0;
+ //create main io thread
+ int read_buf_sz = 102400 * thrd_num_s;
+ read_buf0 = new string[read_buf_sz];
+ read_buf1 = new string[read_buf_sz];
+ io_stat0 = 1; //must be one, if io_stat0 =0 ,the io thread will work immediately
+ io_stat1 = 1;
+ io_ready = 0;
+ io_para_main io_para_mains;
+ io_para_mains.read_buf_sz = read_buf_sz;
+ io_para_mains.in_filenames_vt = &in_filenames_vt;
+ pthread_t io_thread;
+ int temp;
+
+ //fprintf(stderr,"Creating main io thread ...\n");
+ if ( ( temp = pthread_create ( &io_thread, NULL, run_io_thread_main, &io_para_mains ) ) != 0 )
+ {
+ fprintf ( stderr, "ERROR: failed creating main io thread.\n" );
+ exit ( -1 );
+ }
+
+ fprintf ( stderr, "1 io thread initialized.\n" );
+ //create work threads for round 2
+ pthread_t threads[thrd_num_s];
+ unsigned char thrdSignal[thrd_num_s + 1];
+ PARAMETER paras[thrd_num_s];
+ locks = ( pthread_spinlock_t * ) calloc ( ht2.ht_sz, sizeof ( pthread_spinlock_t ) );
+
+ //initialize locks unlock
+ for ( size_t i = 0; i < ht2.ht_sz; ++i )
+ {
+ locks[i] = 1;
+ }
+
+ //create threads
+ //fprintf(stderr,"Creating work threads ...\n");
+ bucket_count_total = ( size_t * ) calloc ( thrd_num_s, sizeof ( size_t ) );
+ edge_cnt_total = ( size_t * ) calloc ( thrd_num_s, sizeof ( size_t ) );
+
+ for ( int k = 0; k < thrd_num_s; k++ )
+ {
+ thrdSignal[k + 1] = 0;
+ paras[k].threadID = k;
+ paras[k].mainSignal = &thrdSignal[0];
+ paras[k].selfSignal = &thrdSignal[k + 1];
+ paras[k].ht = &ht2;
+ paras[k].K_size = K_size;
+ paras[k].gap = gap;
+ }
+
+ creatThrds ( threads, paras );
+ thrdSignal[0] = 0;
+ //begin to work
+ int foundcount = 0;
+ int flipcount = 0;
+ size_t processed_reads = 0;
+
+ while ( 1 )
+ {
+ sendIOWorkSignal();
+
+ while ( io_ready == 0 )
+ {
+ usleep ( 1 );
+ }
+
+ if ( io_ready )
+ {
+ //read_c = read_num;
+ processed_reads += read_num;
+ sendWorkSignal ( 11, thrdSignal );
+
+ for ( int k1 = 0; k1 < thrd_num_s; ++k1 )
+ {
+ edge_cnt += edge_cnt_total[k1];
+ edge_cnt_total[k1] = 0;
+ }
+ }
+
+ if ( io_ready == 2 )
+ {
+ //fprintf(stderr,"All reads have been processed !\n");
+ break;
+ }
+ }
+
+ sendWorkSignal ( 3, thrdSignal );
+ thread_wait ( threads );
+ free ( ( void * ) bucket_count_total );
+ free ( ( void * ) locks );
+ free ( ( void * ) edge_cnt_total );
+ delete [] read_buf0;
+ delete [] read_buf1;
+ SavingSparseKmerGraph2 ( &ht2, graphfile );
+ time ( &read_time );
+ //fprintf(stderr,"Round 2 consumed time: %.fs.\n",difftime(read_time,beg_time));
+ //fprintf(stderr,"Number of processed reads: %llu \n",processed_reads);
+ fprintf ( stderr, "Time spent on building graph round 2: %.fs, %llu reads processed\n", difftime ( read_time, beg_time ), processed_reads );
+ fprintf ( stderr, "%llu nodes allocated, %llu kmer-edges allocated.\n", bucket_count, edge_cnt );
+ fprintf ( stderr, "\n" );
+ }
+ }
+ }
+
+ if ( LOAD_GRAPH )
+ {
+ fprintf ( stderr, "Loading the graph ...\n" );
+ LoadingSparseKmerGraph2 ( &ht2, graphfile );
+ }
+
+ if ( BUILD_EDGES )
+ {
+ RemovingWeakNodesAndEdges2 ( &ht2, K_size, NodeCovTh, EdgeCovTh, &bucket_count, &edge_cnt );
+ int cut_len_tip = 2 * K_size;
+ int tip_c = 0;
+ time_t start, finish, interval;
+ fprintf ( stderr, "Start to remove tips with minority links.\n" );
+ start = time ( NULL );
+ removeMinorTips ( &ht2, K_size, cut_len_tip, tip_c );
+ finish = time ( NULL );
+ interval = ( finish - start ) ;
+ //fprintf(stderr,"Removing minor tips consumes %llu s.\n\n",interval);
+ fprintf ( stderr, "Time spent on removing tips: %llus.\n\n", interval );
+ fprintf ( stderr, "Start to construct edges.\n" );
+ start = time ( NULL );
+ char outfile[256];
+ sprintf ( outfile, "%s.sparse.edge", graphfile );
+ kmer2edges ( &ht2, K_size, outfile );
+ free_hashtable ( &ht2 );
+ char temp[256];
+ sprintf ( temp, "%s.sparse.edge", graphfile );
+ convert ( temp, K_size, graphfile );
+ finish = time ( NULL );
+ interval = ( finish - start ) ;
+ //fprintf(stderr,"Building edges consumes %llu s.\n\n",interval);
+ fprintf ( stderr, "Time spent on constructing edges: %llus.\n\n", interval );
+ }
+
+ if ( BUILD_PREARCS ) //build preArc
+ {
+ size_t v_sz, e_sz;
+ int K_size;
+ char basicInfo[128];
+ sprintf ( basicInfo, "%s.preGraphBasic", graphfile );
+ FILE *fin;
+ char line[1024];
+ char str[32];
+ fin = fopen ( basicInfo, "r" );
+
+ if ( !fin )
+ {
+ fprintf ( stderr, "ERROR: can't open file %s\n", basicInfo );
+ exit ( 1 );
+ }
+
+ bool a = 0, b = 0;
+
+ while ( fgets ( line, 1024, fin ) != NULL )
+ {
+ if ( line[0] == 'V' ) //VERTEX
+ {
+ sscanf ( line + 6, "%lu %s %d", &v_sz, str, &K_size );
+ a = 1;
+ }
+
+ if ( line[0] == 'E' ) //EDGEs
+ {
+ sscanf ( line, "%s %lu", str, &e_sz );
+ b = 1;
+ break;
+ }
+ }
+
+ if ( !a || !b )
+ {
+ fprintf ( stderr, "ERROR: preGraphBasic file is in invaild format!\n" );
+ exit ( 1 );
+ }
+
+ vertex_hash2 v_ht;
+ preArc_array arc_arr;
+ char edge_file[128];
+ sprintf ( edge_file, "%s.sparse.edge", graphfile );
+ time_t start, finish, interval;
+ //step1:
+ //fprintf(stderr,"Building vertexes ...\n");
+ fprintf ( stderr, "Start to build vertex indexes.\n" );
+ start = time ( NULL );
+ init_vertex_hash ( &v_ht, v_sz );
+ build_vertexes ( &v_ht, K_size, edge_file );
+ finish = time ( NULL );
+ interval = ( finish - start ) ;
+ //fprintf(stderr,"Building vertexes consumes %llu s.\n\n",interval);
+ fprintf ( stderr, "Time spent on building vertex indexes: %llus.\n\n", interval );
+ //step2:
+ //fprintf(stderr,"Building preArcs ...\n");
+ fprintf ( stderr, "Start to build preArcs.\n" );
+ start = time ( NULL );
+ int cut_off_len = 256;//tmp
+ init_preArc_array ( &arc_arr, e_sz + 1 );
+
+ if ( solve )
+ {
+ /* initialize the threads' common data mark_on_edge s_locks*/
+ mark_on_edge = ( unsigned int * ) calloc ( e_sz + 1, sizeof ( unsigned int ) );
+ s_locks = ( pthread_spinlock_t * ) calloc ( e_sz + 1, sizeof ( pthread_spinlock_t ) );
+
+ for ( size_t i = 0; i < e_sz + 1; i++ )
+ {
+ s_locks[i] = 1;
+ }
+
+ /*buffers for seperate threads*/
+ path_buffer = ( edge_path_buffer ** ) calloc ( thrd_num_s, sizeof ( edge_path_buffer ) );
+
+ for ( int i = 0; i < thrd_num_s; i++ )
+ {
+ path_buffer[i] = create_edge_path_buffer ( mark_on_edge, s_locks, buff_size, max_path_length );
+ }
+
+ /*initialize the output file */
+ char mark_file[128], path_file[128];
+ sprintf ( mark_file, "%s.markOnEdge", graphfile );
+ sprintf ( path_file, "%s.path", graphfile );
+ mark_fp = fopen ( mark_file, "w" );
+ path_fp = fopen ( path_file, "w" );
+ pthread_mutex_init ( &file_lock, NULL );
+ }
+
+ build_preArc_threaded ( &arc_arr, &v_ht, K_size, cut_off_len, &in_filenames_vt, thrd_num_s );
+
+ if ( solve )
+ {
+ //output mark_on_edge
+ size_t markCounter = 0;
+
+ for ( size_t i = 1; i <= e_sz; i++ )
+ {
+ markCounter += mark_on_edge[i];
+ fprintf ( mark_fp, "%d\n", mark_on_edge[i] );
+ }
+
+ fprintf ( stderr, "Total number of marks in file markOnEdge: %lu\n", markCounter );
+ fclose ( mark_fp );
+
+ //output path_buffer
+ for ( int i = 0; i < thrd_num_s; i++ )
+ {
+ output_edge_path_buffer ( path_buffer[i], path_fp );
+ }
+
+ fclose ( path_fp );
+
+ //destory buffers ...
+ for ( int i = 0; i < thrd_num_s; i++ )
+ {
+ destory_edge_path_buffer ( path_buffer[i] );
+ }
+
+ free ( ( void * ) mark_on_edge );
+ free ( ( void * ) s_locks );
+ pthread_mutex_destroy ( &file_lock );
+ }
+
+ finish = time ( NULL );
+ interval = ( finish - start );
+ //fprintf(stderr,"Building preArcs consumes %llu s.\n\n",interval);
+ fprintf ( stderr, "Time spent on building preArcs: %llus.\n\n", interval );
+ char prearc_file[128];
+ sprintf ( prearc_file, "%s.preArc", graphfile );
+ output_preArcs ( &arc_arr, prearc_file );
+ }
+
+ time ( &all_end_time );
+ fprintf ( stderr, "Overall time spent on constructing lightgraph: %.fm.\n", difftime ( all_end_time, all_beg_time ) / 60 );
+ return 0;
}
-static void initenv ( int argc, char ** argv )
+static void initenv ( int argc, char **argv )
{
- int copt;
- int inpseq, outseq, genome_sz;
- extern char * optarg;
- char temp[100];
- optind = 1;
- inpseq = outseq = genome_sz = 0;
-
- while ( ( copt = getopt ( argc, argv, "s:o:K:g:z:d:e:p:m:r:R" ) ) != EOF )
- {
- switch ( copt )
- {
- case 's':
- inpseq = 1;
- sscanf ( optarg, "%s", shortrdsfile );
- continue;
- case 'o':
- outseq = 1;
- sscanf ( optarg, "%s", graphfile );
- continue;
- case 'K':
- sscanf ( optarg, "%s", temp );
- K_size = atoi ( temp );
- continue;
- case 'g':
- sscanf ( optarg, "%s", temp );
- gap = atoi ( temp );
- continue;
- case 'z':
- genome_sz = 1;
- sscanf ( optarg, "%Lu", &GenomeSize );
- continue;
- case 'd':
- sscanf ( optarg, "%s", temp );
- NodeCovTh = atoi ( temp ) >= 0 ? atoi ( temp ) : 0;
- continue;
- case 'e':
- sscanf ( optarg, "%s", temp );
- EdgeCovTh = atoi ( temp ) >= 0 ? atoi ( temp ) : 0;
- continue;
- case 'R':
- solve = 1;
- continue;
- case 'p':
- sscanf ( optarg, "%s", temp );
- thrd_num_s = atoi ( temp );
- continue;
- case 'm':
- continue;
- case 'r':
- sscanf ( optarg, "%s", temp );
- run_mode = atoi ( temp );
- continue;
- default:
-
- if ( inpseq == 0 || outseq == 0 )
- {
- display_pregraph_usage();
- exit ( -1 );
- }
- }
- }
-
- if ( inpseq == 0 || outseq == 0 || genome_sz == 0 )
- {
- display_pregraph_usage();
- exit ( -1 );
- }
+ int copt;
+ int inpseq, outseq, genome_sz;
+ extern char *optarg;
+ char temp[100];
+ optind = 1;
+ inpseq = outseq = genome_sz = 0;
+
+ while ( ( copt = getopt ( argc, argv, "s:o:K:g:z:d:e:p:m:r:R" ) ) != EOF )
+ {
+ switch ( copt )
+ {
+ case 's':
+ inpseq = 1;
+ sscanf ( optarg, "%s", shortrdsfile );
+ continue;
+
+ case 'o':
+ outseq = 1;
+ sscanf ( optarg, "%s", graphfile );
+ continue;
+
+ case 'K':
+ sscanf ( optarg, "%s", temp );
+ K_size = atoi ( temp );
+ continue;
+
+ case 'g':
+ sscanf ( optarg, "%s", temp );
+ gap = atoi ( temp );
+ continue;
+
+ case 'z':
+ genome_sz = 1;
+ sscanf ( optarg, "%Lu", &GenomeSize );
+ continue;
+
+ case 'd':
+ sscanf ( optarg, "%s", temp );
+ NodeCovTh = atoi ( temp ) >= 0 ? atoi ( temp ) : 0;
+ continue;
+
+ case 'e':
+ sscanf ( optarg, "%s", temp );
+ EdgeCovTh = atoi ( temp ) >= 0 ? atoi ( temp ) : 0;
+ continue;
+
+ case 'R':
+ solve = 1;
+ continue;
+
+ case 'p':
+ sscanf ( optarg, "%s", temp );
+ thrd_num_s = atoi ( temp );
+ continue;
+
+ case 'm':
+ continue;
+
+ case 'r':
+ sscanf ( optarg, "%s", temp );
+ run_mode = atoi ( temp );
+ continue;
+
+ default:
+
+ if ( inpseq == 0 || outseq == 0 )
+ {
+ display_pregraph_usage();
+ exit ( -1 );
+ }
+ }
+ }
+
+ if ( inpseq == 0 || outseq == 0 || genome_sz == 0 )
+ {
+ display_pregraph_usage();
+ exit ( -1 );
+ }
}
static void parse_args ( vector<string> &in_filenames_vt )
{
- if ( K_size % 2 == 0 )
- {
- K_size--;
- }
+ if ( K_size % 2 == 0 )
+ {
+ K_size--;
+ }
#ifdef _63MER_
- if ( K_size > 63 )
- {
- fprintf ( stderr, "ERROR: Parameter K is set too large, max value is 63.\n" );
- exit ( -1 );
- }
+ if ( K_size > 63 )
+ {
+ fprintf ( stderr, "ERROR: Parameter K is set too large, max value is 63.\n" );
+ exit ( -1 );
+ }
#endif
#ifdef _127MER_
- if ( K_size > 127 )
- {
- fprintf ( stderr, "ERROR: Parameter K is set too large, max value is 127.\n" );
- exit ( -1 );
- }
+ if ( K_size > 127 )
+ {
+ fprintf ( stderr, "ERROR: Parameter K is set too large, max value is 127.\n" );
+ exit ( -1 );
+ }
#endif
- if ( gap > 25 )
- {
- fprintf ( stderr, "ERROR: Parameter g is set too large, max value is 25.\n" );
- exit ( -1 );
- }
-
- //print the args
- fprintf ( stderr, "********************\n" );
- fprintf ( stderr, "SparsePregraph\n" );
- fprintf ( stderr, "********************\n" );
- fprintf ( stderr, "\n" );
- fprintf ( stderr, "Parameters: " );
- fprintf ( stderr, "pregraph_sparse -s %s", shortrdsfile );
- fprintf ( stderr, " -K %d", K_size );
- fprintf ( stderr, " -g %d", gap );
- fprintf ( stderr, " -z %lu", GenomeSize );
- fprintf ( stderr, " -d %d", NodeCovTh );
- fprintf ( stderr, " -e %d", EdgeCovTh );
-
- if ( solve )
- {
- fprintf ( stderr, " -R " );
- }
-
- fprintf ( stderr, " -r %d", run_mode );
- fprintf ( stderr, " -p %d", thrd_num_s );
- fprintf ( stderr, " -o %s\n\n", graphfile );
-
- if ( run_mode == 0 ) //build all
- {
- LOAD_GRAPH = 0;
- BUILD_DBG = 1;
- BUILD_EDGES = 1;
- BUILD_PREARCS = 1;
- }
- else if ( run_mode == 1 ) //build edges ,build preArcs
- {
- LOAD_GRAPH = 1;
- BUILD_EDGES = 1;
- BUILD_PREARCS = 1;
- }
- else if ( run_mode == 2 ) //build graph only
- {
- LOAD_GRAPH = 0;
- BUILD_DBG = 1;
- BUILD_EDGES = 0;
- BUILD_PREARCS = 0;
- }
- else if ( run_mode == 3 ) //build edges only
- {
- LOAD_GRAPH = 1;
- BUILD_EDGES = 1;
- BUILD_PREARCS = 0;
- }
- else if ( run_mode == 4 ) //build preArc only
- {
- LOAD_GRAPH = 0;
- BUILD_DBG = 0;
- BUILD_EDGES = 0;
- BUILD_PREARCS = 1;
- }
- else
- {
- fprintf ( stderr, "ERROR: invalid runMode!\n" );
- exit ( -1 );
- }
-
- read_lib ( in_filenames_vt, shortrdsfile );
- /*
- fprintf(stderr,"The reads for building sparse de Brujin graph:\n");
- for (int i=0;i<in_filenames_vt.size();++i){
- fprintf(stderr,"%s\n",in_filenames_vt[i].c_str());
- }
- fprintf(stderr,"\n");
- */
+ if ( gap > 25 )
+ {
+ fprintf ( stderr, "ERROR: Parameter g is set too large, max value is 25.\n" );
+ exit ( -1 );
+ }
+
+ //print the args
+ fprintf ( stderr, "********************\n" );
+ fprintf ( stderr, "SparsePregraph\n" );
+ fprintf ( stderr, "********************\n" );
+ fprintf ( stderr, "\n" );
+ fprintf ( stderr, "Parameters: " );
+ fprintf ( stderr, "pregraph_sparse -s %s", shortrdsfile );
+ fprintf ( stderr, " -K %d", K_size );
+ fprintf ( stderr, " -g %d", gap );
+ fprintf ( stderr, " -z %lu", GenomeSize );
+ fprintf ( stderr, " -d %d", NodeCovTh );
+ fprintf ( stderr, " -e %d", EdgeCovTh );
+
+ if ( solve )
+ {
+ fprintf ( stderr, " -R " );
+ }
+
+ fprintf ( stderr, " -r %d", run_mode );
+ fprintf ( stderr, " -p %d", thrd_num_s );
+ fprintf ( stderr, " -o %s\n\n", graphfile );
+
+ if ( run_mode == 0 ) //build all
+ {
+ LOAD_GRAPH = 0;
+ BUILD_DBG = 1;
+ BUILD_EDGES = 1;
+ BUILD_PREARCS = 1;
+ }
+ else if ( run_mode == 1 ) //build edges ,build preArcs
+ {
+ LOAD_GRAPH = 1;
+ BUILD_EDGES = 1;
+ BUILD_PREARCS = 1;
+ }
+ else if ( run_mode == 2 ) //build graph only
+ {
+ LOAD_GRAPH = 0;
+ BUILD_DBG = 1;
+ BUILD_EDGES = 0;
+ BUILD_PREARCS = 0;
+ }
+ else if ( run_mode == 3 ) //build edges only
+ {
+ LOAD_GRAPH = 1;
+ BUILD_EDGES = 1;
+ BUILD_PREARCS = 0;
+ }
+ else if ( run_mode == 4 ) //build preArc only
+ {
+ LOAD_GRAPH = 0;
+ BUILD_DBG = 0;
+ BUILD_EDGES = 0;
+ BUILD_PREARCS = 1;
+ }
+ else
+ {
+ fprintf ( stderr, "ERROR: invalid runMode!\n" );
+ exit ( -1 );
+ }
+
+ read_lib ( in_filenames_vt, shortrdsfile );
+ /*
+ fprintf(stderr,"The reads for building sparse de Brujin graph:\n");
+ for (int i=0;i<in_filenames_vt.size();++i){
+ fprintf(stderr,"%s\n",in_filenames_vt[i].c_str());
+ }
+ fprintf(stderr,"\n");
+ */
}
static void display_pregraph_usage()
{
- fprintf ( stderr, "Usage: sparse_pregraph -s configFile -K kmer -z genomeSize -o outputGraph [-g maxKmerEdgeLength -d kmerFreqCutoff -e kmerEdgeFreqCutoff -R -r runMode -p n_cpu]\n" );
- fprintf ( stderr, " -s <string> configFile: the config file of solexa reads\n" );
+ fprintf ( stderr, "Usage: sparse_pregraph -s configFile -K kmer -z genomeSize -o outputGraph [-g maxKmerEdgeLength -d kmerFreqCutoff -e kmerEdgeFreqCutoff -R -r runMode -p n_cpu]\n" );
+ fprintf ( stderr, " -s <string> configFile: the config file of solexa reads\n" );
#ifdef _63MER_
- fprintf ( stderr, " -K <int> kmer(min 13, max 63): kmer size, [23]\n" );
+ fprintf ( stderr, " -K <int> kmer(min 13, max 63): kmer size, [23]\n" );
#endif
#ifdef _127MER_
- fprintf ( stderr, " -K <int> kmer(min 13, max 127): kmer size, [23]\n" );
+ fprintf ( stderr, " -K <int> kmer(min 13, max 127): kmer size, [23]\n" );
#endif
- fprintf ( stderr, " -g <int> maxKmerEdgeLength(min 1, max 25): number of skipped intermediate kmers, [15]\n" );
- fprintf ( stderr, " -z <int> genomeSize(required): estimated genome size\n" );
- fprintf ( stderr, " -d <int> kmerFreqCutoff: delete kmers with frequency no larger than,[1]\n" );
- fprintf ( stderr, " -e <int> kmerEdgeFreqCutoff: delete kmers' related edge with frequency no larger than [1]\n" );
- fprintf ( stderr, " -R (optional) output extra information for resolving repeats in contig step, [NO]\n" );
- fprintf ( stderr, " -r <int> runMode: 0 build graph & build edge and preArc, 1 load graph by prefix & build edge and preArc, 2 build graph only, 3 build edges only, 4 build preArcs only [0] \n" );
- fprintf ( stderr, " -p <int> n_cpu: number of cpu for use,[8]\n" );
- fprintf ( stderr, " -o <int> outputGraph: prefix of output graph file name\n" );
+ fprintf ( stderr, " -g <int> maxKmerEdgeLength(min 1, max 25): number of skipped intermediate kmers, [15]\n" );
+ fprintf ( stderr, " -z <int> genomeSize(required): estimated genome size\n" );
+ fprintf ( stderr, " -d <int> kmerFreqCutoff: delete kmers with frequency no larger than,[1]\n" );
+ fprintf ( stderr, " -e <int> kmerEdgeFreqCutoff: delete kmers' related edge with frequency no larger than [1]\n" );
+ fprintf ( stderr, " -R (optional) output extra information for resolving repeats in contig step, [NO]\n" );
+ fprintf ( stderr, " -r <int> runMode: 0 build graph & build edge and preArc, 1 load graph by prefix & build edge and preArc, 2 build graph only, 3 build edges only, 4 build preArcs only [0] \n" );
+ fprintf ( stderr, " -p <int> n_cpu: number of cpu for use,[8]\n" );
+ fprintf ( stderr, " -o <int> outputGraph: prefix of output graph file name\n" );
}
diff --git a/standardPregraph/Makefile b/standardPregraph/Makefile
index 9716391..d0caaaa 100644
--- a/standardPregraph/Makefile
+++ b/standardPregraph/Makefile
@@ -9,9 +9,9 @@ CC= gcc
GCCVERSIONMAJOR := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONMINOR := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 4)
ifdef debug
-CFLAGS= -O0 -g -fomit-frame-pointer #-static #-mcrc32 -march=core2 -msse4.1 -msse4.2
+CFLAGS= -O0 -g -fomit-frame-pointer #-msse4.2
else
-CFLAGS= -O4 -fomit-frame-pointer #-static #-mcrc32 -march=core2 -msse4.1 -msse4.2
+CFLAGS= -O3 -fomit-frame-pointer -w #-msse4.2
endif
DFLAGS=
OBJS= arc.o attachPEinfo.o bubble.o check.o compactEdge.o \
@@ -49,15 +49,6 @@ EXTRA_FLAGS += -Wl,--hash-style=both
LIBS += -lbam -lrt
endif
-ifneq (,$(findstring Unix,$(shell uname)))
-EXTRA_FLAGS += -Wl,--hash-style=both
-LIBS += -lbam -lrt
-endif
-
-ifneq (,$(findstring Darwin,$(shell uname)))
-LIBS += -lbammac
-endif
-
ifneq (,$(findstring $(shell uname -m), x86_64))
CFLAGS += -m64
endif
@@ -73,12 +64,11 @@ endif
.SUFFIXES:.c .o
.c.o:
- @printf "Compiling $<... \r"; \
- $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< || echo "Error in command: $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $<"
-
-all: clean $(OBJS)
+ @printf "Compiling $<... \r"
+ @$(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< || echo "Error in command: $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $<"
-#SOAPdenovo
+all: $(OBJS)
+ @printf "$(PROG) objects generated. \n"
.PHONY:all clean install
@@ -88,14 +78,10 @@ envTest:
@test $(GCCVERSIONMINOR) == 1 || sh -c 'echo "GCC version lower than 4.4.0";false;'
SOAPdenovo: envTest $(OBJS)
- @printf "Linking... \r"
+ @printf "Linking... \r"
@$(CC) $(CFLAGS) -o $(PROG) $(OBJS) $(LIBPATH) $(LIBS) $(ENTRAFLAGS)
- @printf "$(PROG) compilation done.\n";
+ @printf "$(PROG) compilation done. \n"
clean:
@rm -fr gmon.out *.o a.out *.exe *.dSYM $(PROG) *~ *.a *.so.* *.so *.dylib
- @printf "$(PROG) cleaning done.\n";
-
-install:
- @cp $(PROG) ../bin/
- @printf "$(PROG) installed at ../bin/$(PROG)\n"
+ @printf "$(PROG) cleaning done. \n"
diff --git a/standardPregraph/arc.c b/standardPregraph/arc.c
index 32cf554..ddf3069 100644
--- a/standardPregraph/arc.c
+++ b/standardPregraph/arc.c
@@ -1,7 +1,7 @@
/*
* arc.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -30,229 +30,229 @@
void createPreArcMemManager ()
{
- prearc_mem_manager = createMem_manager ( preARCBLOCKSIZE, sizeof ( preARC ) );
+ prearc_mem_manager = createMem_manager ( preARCBLOCKSIZE, sizeof ( preARC ) );
}
void prlDestroyPreArcMem ()
{
- if ( !preArc_mem_managers )
- {
- return;
- }
+ if ( !preArc_mem_managers )
+ {
+ return;
+ }
- int i;
+ int i;
- for ( i = 0; i < thrd_num; i++ )
- {
- freeMem_manager ( preArc_mem_managers[i] );
- }
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ freeMem_manager ( preArc_mem_managers[i] );
+ }
- free ( ( void * ) preArc_mem_managers );
- preArc_mem_managers = NULL;
+ free ( ( void * ) preArc_mem_managers );
+ preArc_mem_managers = NULL;
}
void destroyPreArcMem ()
{
- freeMem_manager ( prearc_mem_manager );
- prearc_mem_manager = NULL;
+ freeMem_manager ( prearc_mem_manager );
+ prearc_mem_manager = NULL;
}
-preARC * prlAllocatePreArc ( unsigned int edgeid, MEM_MANAGER * manager )
+preARC *prlAllocatePreArc ( unsigned int edgeid, MEM_MANAGER *manager )
{
- preARC * newArc;
- newArc = ( preARC * ) getItem ( manager );
- newArc->to_ed = edgeid;
- newArc->multiplicity = 1;
- newArc->next = NULL;
- return newArc;
+ preARC *newArc;
+ newArc = ( preARC * ) getItem ( manager );
+ newArc->to_ed = edgeid;
+ newArc->multiplicity = 1;
+ newArc->next = NULL;
+ return newArc;
}
-preARC * allocatePreArc ( unsigned int edgeid )
+preARC *allocatePreArc ( unsigned int edgeid )
{
- arcCounter++;
- preARC * newArc;
- newArc = ( preARC * ) getItem ( prearc_mem_manager );
- newArc->to_ed = edgeid;
- newArc->multiplicity = 1;
- newArc->next = NULL;
- return newArc;
+ arcCounter++;
+ preARC *newArc;
+ newArc = ( preARC * ) getItem ( prearc_mem_manager );
+ newArc->to_ed = edgeid;
+ newArc->multiplicity = 1;
+ newArc->next = NULL;
+ return newArc;
}
-void output_arcGVZ ( char * outfile, boolean IsContig )
+void output_arcGVZ ( char *outfile, boolean IsContig )
{
- ARC * pArc;
- preARC * pPreArc;
- char name[256];
- FILE * fp;
- unsigned int i;
- sprintf ( name, "%s.arc.gvz", outfile );
- fp = ckopen ( name, "w" );
- fprintf ( fp, "digraph G{\n" );
- fprintf ( fp, "\tsize=\"512,512\";\n" );
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( IsContig )
- {
- pPreArc = contig_array[i].arcs;
-
- while ( pPreArc )
- {
- fprintf ( fp, "\tC%d -> C%d[label =\"%d\"];\n", i, pPreArc->to_ed, pPreArc->multiplicity );
- pPreArc = pPreArc->next;
- }
- }
- else
- {
- pArc = edge_array[i].arcs;
-
- while ( pArc )
- {
- fprintf ( fp, "\tC%d -> C%d[label =\"%d\"];\n", i, pArc->to_ed, pArc->multiplicity );
- pArc = pArc->next;
- }
- }
- }
-
- fprintf ( fp, "}\n" );
- fclose ( fp );
+ ARC *pArc;
+ preARC *pPreArc;
+ char name[256];
+ FILE *fp;
+ unsigned int i;
+ sprintf ( name, "%s.arc.gvz", outfile );
+ fp = ckopen ( name, "w" );
+ fprintf ( fp, "digraph G{\n" );
+ fprintf ( fp, "\tsize=\"512,512\";\n" );
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( IsContig )
+ {
+ pPreArc = contig_array[i].arcs;
+
+ while ( pPreArc )
+ {
+ fprintf ( fp, "\tC%d -> C%d[label =\"%d\"];\n", i, pPreArc->to_ed, pPreArc->multiplicity );
+ pPreArc = pPreArc->next;
+ }
+ }
+ else
+ {
+ pArc = edge_array[i].arcs;
+
+ while ( pArc )
+ {
+ fprintf ( fp, "\tC%d -> C%d[label =\"%d\"];\n", i, pArc->to_ed, pArc->multiplicity );
+ pArc = pArc->next;
+ }
+ }
+ }
+
+ fprintf ( fp, "}\n" );
+ fclose ( fp );
}
/**************** below this line all codes are about ARC ****************/
#define ARCBLOCKSIZE 100000
void createArcMemo ()
{
- if ( !arc_mem_manager )
- {
- arc_mem_manager = createMem_manager ( ARCBLOCKSIZE, sizeof ( ARC ) );
- }
- else
- {
- fprintf ( stderr, "Warning from createArcMemo: arc_mem_manager is a active pointer.\n" );
- }
+ if ( !arc_mem_manager )
+ {
+ arc_mem_manager = createMem_manager ( ARCBLOCKSIZE, sizeof ( ARC ) );
+ }
+ else
+ {
+ fprintf ( stderr, "Warning from createArcMemo: arc_mem_manager is a active pointer.\n" );
+ }
}
void destroyArcMem ()
{
- freeMem_manager ( arc_mem_manager );
- arc_mem_manager = NULL;
+ freeMem_manager ( arc_mem_manager );
+ arc_mem_manager = NULL;
}
-ARC * allocateArc ( unsigned int edgeid )
+ARC *allocateArc ( unsigned int edgeid )
{
- arcCounter++;
- ARC * newArc;
- newArc = ( ARC * ) getItem ( arc_mem_manager );
- newArc->to_ed = edgeid;
- newArc->multiplicity = 1;
- newArc->prev = NULL;
- newArc->next = NULL;
- return newArc;
+ arcCounter++;
+ ARC *newArc;
+ newArc = ( ARC * ) getItem ( arc_mem_manager );
+ newArc->to_ed = edgeid;
+ newArc->multiplicity = 1;
+ newArc->prev = NULL;
+ newArc->next = NULL;
+ return newArc;
}
-void dismissArc ( ARC * arc )
+void dismissArc ( ARC *arc )
{
- returnItem ( arc_mem_manager, arc );
+ returnItem ( arc_mem_manager, arc );
}
/***************** below this line all codes are about lookup table *****************/
void createArcLookupTable ()
{
- if ( !arcLookupTable )
- {
- arcLookupTable = ( ARC ** ) ckalloc ( ( 3 * num_ed + 1 ) * sizeof ( ARC * ) );
- }
+ if ( !arcLookupTable )
+ {
+ arcLookupTable = ( ARC ** ) ckalloc ( ( 3 * num_ed + 1 ) * sizeof ( ARC * ) );
+ }
}
void deleteArcLookupTable ()
{
- if ( arcLookupTable )
- {
- free ( ( void * ) arcLookupTable );
- arcLookupTable = NULL;
- }
+ if ( arcLookupTable )
+ {
+ free ( ( void * ) arcLookupTable );
+ arcLookupTable = NULL;
+ }
}
-void putArc2LookupTable ( unsigned int from_ed, ARC * arc )
+void putArc2LookupTable ( unsigned int from_ed, ARC *arc )
{
- if ( !arc || !arcLookupTable )
- {
- return;
- }
-
- unsigned int index = 2 * from_ed + arc->to_ed;
- arc->nextInLookupTable = arcLookupTable[index];
- arcLookupTable[index] = arc;
+ if ( !arc || !arcLookupTable )
+ {
+ return;
+ }
+
+ unsigned int index = 2 * from_ed + arc->to_ed;
+ arc->nextInLookupTable = arcLookupTable[index];
+ arcLookupTable[index] = arc;
}
-static ARC * getArcInLookupTable ( unsigned int from_ed, unsigned int to_ed )
+static ARC *getArcInLookupTable ( unsigned int from_ed, unsigned int to_ed )
{
- unsigned int index = 2 * from_ed + to_ed;
- ARC * ite_arc = arcLookupTable[index];
+ unsigned int index = 2 * from_ed + to_ed;
+ ARC *ite_arc = arcLookupTable[index];
- while ( ite_arc )
- {
- if ( ite_arc->to_ed == to_ed )
- {
- return ite_arc;
- }
+ while ( ite_arc )
+ {
+ if ( ite_arc->to_ed == to_ed )
+ {
+ return ite_arc;
+ }
- ite_arc = ite_arc->nextInLookupTable;
- }
+ ite_arc = ite_arc->nextInLookupTable;
+ }
- return NULL;
+ return NULL;
}
void removeArcInLookupTable ( unsigned int from_ed, unsigned int to_ed )
{
- unsigned int index = 2 * from_ed + to_ed;
- ARC * ite_arc = arcLookupTable[index];
- ARC * arc;
-
- if ( !ite_arc )
- {
- fprintf ( stderr, "RemoveArcInLookupTable: not found A.\n" );
- return;
- }
-
- if ( ite_arc->to_ed == to_ed )
- {
- arcLookupTable[index] = ite_arc->nextInLookupTable;
- return;
- }
-
- while ( ite_arc->nextInLookupTable && ite_arc->nextInLookupTable->to_ed != to_ed )
- {
- ite_arc = ite_arc->nextInLookupTable;
- }
-
- if ( ite_arc->nextInLookupTable )
- {
- arc = ite_arc->nextInLookupTable;
- ite_arc->nextInLookupTable = arc->nextInLookupTable;
- return;
- }
-
- fprintf ( stderr, "RemoveArcInLookupTable: not found B.\n" );
- return;
+ unsigned int index = 2 * from_ed + to_ed;
+ ARC *ite_arc = arcLookupTable[index];
+ ARC *arc;
+
+ if ( !ite_arc )
+ {
+ fprintf ( stderr, "RemoveArcInLookupTable: not found A.\n" );
+ return;
+ }
+
+ if ( ite_arc->to_ed == to_ed )
+ {
+ arcLookupTable[index] = ite_arc->nextInLookupTable;
+ return;
+ }
+
+ while ( ite_arc->nextInLookupTable && ite_arc->nextInLookupTable->to_ed != to_ed )
+ {
+ ite_arc = ite_arc->nextInLookupTable;
+ }
+
+ if ( ite_arc->nextInLookupTable )
+ {
+ arc = ite_arc->nextInLookupTable;
+ ite_arc->nextInLookupTable = arc->nextInLookupTable;
+ return;
+ }
+
+ fprintf ( stderr, "RemoveArcInLookupTable: not found B.\n" );
+ return;
}
void recordArcsInLookupTable ()
{
- unsigned int i;
- ARC * ite_arc;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- ite_arc = edge_array[i].arcs;
-
- while ( ite_arc )
- {
- putArc2LookupTable ( i, ite_arc );
- ite_arc = ite_arc->next;
- }
- }
+ unsigned int i;
+ ARC *ite_arc;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ ite_arc = edge_array[i].arcs;
+
+ while ( ite_arc )
+ {
+ putArc2LookupTable ( i, ite_arc );
+ ite_arc = ite_arc->next;
+ }
+ }
}
/*************************************************
@@ -268,29 +268,29 @@ Output:
Return:
The arc between the edge "from_ed" and the edge "to_ed".
*************************************************/
-ARC * getArcBetween ( unsigned int from_ed, unsigned int to_ed )
+ARC *getArcBetween ( unsigned int from_ed, unsigned int to_ed )
{
- ARC * parc;
+ ARC *parc;
- if ( arcLookupTable )
- {
- parc = getArcInLookupTable ( from_ed, to_ed );
- return parc;
- }
+ if ( arcLookupTable )
+ {
+ parc = getArcInLookupTable ( from_ed, to_ed );
+ return parc;
+ }
- parc = edge_array[from_ed].arcs;
+ parc = edge_array[from_ed].arcs;
- while ( parc )
- {
- if ( parc->to_ed == to_ed )
- {
- return parc;
- }
+ while ( parc )
+ {
+ if ( parc->to_ed == to_ed )
+ {
+ return parc;
+ }
- parc = parc->next;
- }
+ parc = parc->next;
+ }
- return parc;
+ return parc;
}
/*************************************************
@@ -306,19 +306,21 @@ Output:
Return:
The number of the valid arc.
*************************************************/
-int validArcCount ( preARC * arc, int cutoff )
+int validArcCount ( preARC *arc, int cutoff )
{
- int arc_num = 0;
+ int arc_num = 0;
- while ( arc )
- {
- if ( arc->multiplicity >= cutoff )
- { ++arc_num; }
+ while ( arc )
+ {
+ if ( arc->multiplicity >= cutoff )
+ {
+ ++arc_num;
+ }
- arc = arc->next;
- }
+ arc = arc->next;
+ }
- return arc_num;
+ return arc_num;
}
/*************************************************
@@ -333,18 +335,20 @@ Output:
Return:
The maximum weight.
*************************************************/
-unsigned int maxArcWeight ( preARC * arc )
+unsigned int maxArcWeight ( preARC *arc )
{
- unsigned int max = 0;
+ unsigned int max = 0;
- while ( arc )
- {
- if ( arc->multiplicity > max )
- { max = arc->multiplicity; }
+ while ( arc )
+ {
+ if ( arc->multiplicity > max )
+ {
+ max = arc->multiplicity;
+ }
- arc = arc->next;
- }
+ arc = arc->next;
+ }
- return max;
+ return max;
}
diff --git a/standardPregraph/attachPEinfo.c b/standardPregraph/attachPEinfo.c
index 77105d5..36fa353 100644
--- a/standardPregraph/attachPEinfo.c
+++ b/standardPregraph/attachPEinfo.c
@@ -1,7 +1,7 @@
/*
* attachPEinfo.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -29,7 +29,7 @@
#include "zlib.h"
#define CNBLOCKSIZE 10000
-static STACK * isStack;
+static STACK *isStack;
static int ignorePE1, ignorePE2, ignorePE3, static_flag;
static int onsameCtgPE;
static unsigned long long peSUM;
@@ -38,26 +38,26 @@ static boolean staticF;
static int existCounter;
-static int calcuIS ( STACK * intStack );
+static int calcuIS ( STACK *intStack );
-static int cmp_pe ( const void * a, const void * b )
+static int cmp_pe ( const void *a, const void *b )
{
- PE_INFO * A, *B;
- A = ( PE_INFO * ) a;
- B = ( PE_INFO * ) b;
-
- if ( A->rank > B->rank )
- {
- return 1;
- }
- else if ( A->rank == B->rank )
- {
- return 0;
- }
- else
- {
- return -1;
- }
+ PE_INFO *A, *B;
+ A = ( PE_INFO * ) a;
+ B = ( PE_INFO * ) b;
+
+ if ( A->rank > B->rank )
+ {
+ return 1;
+ }
+ else if ( A->rank == B->rank )
+ {
+ return 0;
+ }
+ else
+ {
+ return -1;
+ }
}
/*************************************************
@@ -72,111 +72,111 @@ Output:
Return:
None.
*************************************************/
-void loadPEgrads ( char * infile )
+void loadPEgrads ( char *infile )
{
- FILE * fp;
- char name[256], line[1024];
- int i;
- boolean rankSet = 1;
- sprintf ( name, "%s.peGrads", infile );
- fp = fopen ( name, "r" );
-
- if ( !fp )
- {
- fprintf ( stderr, "Can not open file %s.\n", name );
- gradsCounter = 0;
- return;
- }
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == 'g' )
- {
- sscanf ( line + 10, "%d %lld %d", &gradsCounter, &n_solexa, &maxReadLen );
- fprintf ( stderr, "There are %d grad(s), %lld read(s), max read len %d.\n", gradsCounter, n_solexa, maxReadLen );
- break;
- }
- }
-
- alloc_pe_mem ( gradsCounter );
-
- for ( i = 0; i < gradsCounter; i++ )
- {
- fgets ( line, sizeof ( line ), fp );
- pes[i].rank = 0;
- sscanf ( line, "%d %lld %d %d", & ( pes[i].insertS ), & ( pes[i].PE_bound ), & ( pes[i].rank ), & ( pes[i].pair_num_cut ) );
-
- if ( pes[i].rank < 1 )
- {
- rankSet = 0;
- }
- }
-
- fclose ( fp );
-
- if ( rankSet )
- {
- qsort ( &pes[0], gradsCounter, sizeof ( PE_INFO ), cmp_pe );
- return;
- }
-
- int lastRank = 0;
-
- for ( i = 0; i < gradsCounter; i++ )
- {
- if ( i == 0 )
- {
- pes[i].rank = ++lastRank;
- }
- else if ( pes[i].insertS < 300 )
- {
- pes[i].rank = lastRank;
- }
- else if ( pes[i].insertS < 800 )
- {
- if ( pes[i - 1].insertS < 300 )
- {
- pes[i].rank = ++lastRank;
- }
- else
- {
- pes[i].rank = lastRank;
- }
- }
- else if ( pes[i].insertS < 3000 )
- {
- if ( pes[i - 1].insertS < 800 )
- {
- pes[i].rank = ++lastRank;
- }
- else
- {
- pes[i].rank = lastRank;
- }
- }
- else if ( pes[i].insertS < 7000 )
- {
- if ( pes[i - 1].insertS < 3000 )
- {
- pes[i].rank = ++lastRank;
- }
- else
- {
- pes[i].rank = lastRank;
- }
- }
- else
- {
- if ( pes[i - 1].insertS < 7000 )
- {
- pes[i].rank = ++lastRank;
- }
- else
- {
- pes[i].rank = lastRank;
- }
- }
- }
+ FILE *fp;
+ char name[256], line[1024];
+ int i;
+ boolean rankSet = 1;
+ sprintf ( name, "%s.peGrads", infile );
+ fp = fopen ( name, "r" );
+
+ if ( !fp )
+ {
+ fprintf ( stderr, "Can not open file %s.\n", name );
+ gradsCounter = 0;
+ return;
+ }
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == 'g' )
+ {
+ sscanf ( line + 10, "%d %lld %d", &gradsCounter, &n_solexa, &maxReadLen );
+ fprintf ( stderr, "There are %d grad(s), %lld read(s), max read len %d.\n", gradsCounter, n_solexa, maxReadLen );
+ break;
+ }
+ }
+
+ alloc_pe_mem ( gradsCounter );
+
+ for ( i = 0; i < gradsCounter; i++ )
+ {
+ fgets ( line, sizeof ( line ), fp );
+ pes[i].rank = 0;
+ sscanf ( line, "%d %lld %d %d", & ( pes[i].insertS ), & ( pes[i].PE_bound ), & ( pes[i].rank ), & ( pes[i].pair_num_cut ) );
+
+ if ( pes[i].rank < 1 )
+ {
+ rankSet = 0;
+ }
+ }
+
+ fclose ( fp );
+
+ if ( rankSet )
+ {
+ qsort ( &pes[0], gradsCounter, sizeof ( PE_INFO ), cmp_pe );
+ return;
+ }
+
+ int lastRank = 0;
+
+ for ( i = 0; i < gradsCounter; i++ )
+ {
+ if ( i == 0 )
+ {
+ pes[i].rank = ++lastRank;
+ }
+ else if ( pes[i].insertS < 300 )
+ {
+ pes[i].rank = lastRank;
+ }
+ else if ( pes[i].insertS < 800 )
+ {
+ if ( pes[i - 1].insertS < 300 )
+ {
+ pes[i].rank = ++lastRank;
+ }
+ else
+ {
+ pes[i].rank = lastRank;
+ }
+ }
+ else if ( pes[i].insertS < 3000 )
+ {
+ if ( pes[i - 1].insertS < 800 )
+ {
+ pes[i].rank = ++lastRank;
+ }
+ else
+ {
+ pes[i].rank = lastRank;
+ }
+ }
+ else if ( pes[i].insertS < 7000 )
+ {
+ if ( pes[i - 1].insertS < 3000 )
+ {
+ pes[i].rank = ++lastRank;
+ }
+ else
+ {
+ pes[i].rank = lastRank;
+ }
+ }
+ else
+ {
+ if ( pes[i - 1].insertS < 7000 )
+ {
+ pes[i].rank = ++lastRank;
+ }
+ else
+ {
+ pes[i].rank = lastRank;
+ }
+ }
+ }
}
/*************************************************
@@ -195,103 +195,103 @@ Output:
Return:
NULL if failed adding, otherwise the pointer to new or updated connection.
*************************************************/
-CONNECT * add1Connect ( unsigned int e1, unsigned int e2, int gap, int weight, boolean inherit )
+CONNECT *add1Connect ( unsigned int e1, unsigned int e2, int gap, int weight, boolean inherit )
{
- if ( e1 == e2 || e1 == getTwinCtg ( e2 ) )
- {
- return NULL;
- }
-
- CONNECT * connect = NULL, *bal_connect = NULL;
- long long sum;
-
- if ( weight > 255 )
- {
- weight = 255;
- }
-
- connect = getCntBetween ( e1, e2 );
- bal_connect = getCntBetween ( e2, e1 );
-
- if ( connect )
- {
- if ( !weight )
- {
- return connect;
- }
-
- existCounter++;
-
- if ( !inherit )
- {
- sum = connect->weightNotInherit * connect->gapLen + gap * weight;
- connect->gapLen = sum / ( connect->weightNotInherit + weight );
-
- if ( connect->weightNotInherit + weight <= 255 )
- {
- connect->weightNotInherit += weight;
- }
- else if ( connect->weightNotInherit < 255 )
- {
- connect->weightNotInherit = 255;
- }
- }
- else
- {
- sum = connect->weight * connect->gapLen + gap * weight;
- connect->gapLen = sum / ( connect->weight + weight );
-
- if ( !connect->inherit )
- {
- connect->maxSingleWeight = connect->weightNotInherit;
- }
-
- connect->inherit = 1;
- connect->maxSingleWeight = connect->maxSingleWeight > weight ? connect->maxSingleWeight : weight;
- }
-
- if ( connect->weight + weight <= 255 )
- {
- connect->weight += weight;
- }
- else if ( connect->weight < 255 )
- {
- connect->weight = 255;
- }
- }
- else
- {
- newCntCounter++;
- connect = allocateCN ( e2, gap );
-
- if ( cntLookupTable )
- {
- putCnt2LookupTable ( e1, connect );
- }
-
- connect->weight = weight;
-
- if ( contig_array[e1].mask || contig_array[e2].mask )
- {
- connect->mask = 1;
- }
-
- connect->next = contig_array[e1].downwardConnect;
- contig_array[e1].downwardConnect = connect;
-
- if ( !inherit )
- {
- connect->weightNotInherit = weight;
- }
- else
- {
- connect->weightNotInherit = 0;
- connect->inherit = 1;
- connect->maxSingleWeight = weight;
- }
- }
-
- return connect;
+ if ( e1 == e2 || e1 == getTwinCtg ( e2 ) )
+ {
+ return NULL;
+ }
+
+ CONNECT *connect = NULL, *bal_connect = NULL;
+ long long sum;
+
+ if ( weight > 255 )
+ {
+ weight = 255;
+ }
+
+ connect = getCntBetween ( e1, e2 );
+ bal_connect = getCntBetween ( e2, e1 );
+
+ if ( connect )
+ {
+ if ( !weight )
+ {
+ return connect;
+ }
+
+ existCounter++;
+
+ if ( !inherit )
+ {
+ sum = connect->weightNotInherit * connect->gapLen + gap * weight;
+ connect->gapLen = sum / ( connect->weightNotInherit + weight );
+
+ if ( connect->weightNotInherit + weight <= 255 )
+ {
+ connect->weightNotInherit += weight;
+ }
+ else if ( connect->weightNotInherit < 255 )
+ {
+ connect->weightNotInherit = 255;
+ }
+ }
+ else
+ {
+ sum = connect->weight * connect->gapLen + gap * weight;
+ connect->gapLen = sum / ( connect->weight + weight );
+
+ if ( !connect->inherit )
+ {
+ connect->maxSingleWeight = connect->weightNotInherit;
+ }
+
+ connect->inherit = 1;
+ connect->maxSingleWeight = connect->maxSingleWeight > weight ? connect->maxSingleWeight : weight;
+ }
+
+ if ( connect->weight + weight <= 255 )
+ {
+ connect->weight += weight;
+ }
+ else if ( connect->weight < 255 )
+ {
+ connect->weight = 255;
+ }
+ }
+ else
+ {
+ newCntCounter++;
+ connect = allocateCN ( e2, gap );
+
+ if ( cntLookupTable )
+ {
+ putCnt2LookupTable ( e1, connect );
+ }
+
+ connect->weight = weight;
+
+ if ( contig_array[e1].mask || contig_array[e2].mask )
+ {
+ connect->mask = 1;
+ }
+
+ connect->next = contig_array[e1].downwardConnect;
+ contig_array[e1].downwardConnect = connect;
+
+ if ( !inherit )
+ {
+ connect->weightNotInherit = weight;
+ }
+ else
+ {
+ connect->weightNotInherit = 0;
+ connect->inherit = 1;
+ connect->maxSingleWeight = weight;
+ }
+ }
+
+ return connect;
}
/*************************************************
@@ -316,54 +316,54 @@ CONNECT * add1Connect ( unsigned int e1, unsigned int e2, int gap, int weight, b
*************************************************/
int attach1PE ( unsigned int e1, int pre_pos, unsigned int bal_e2, int pos, int insert_size )
{
- int gap, realpeSize;
- unsigned int bal_e1, e2;
-
- if ( e1 == bal_e2 )
- {
- ignorePE1++;
- return -1; //orientation wrong
- }
-
- bal_e1 = getTwinCtg ( e1 );
- e2 = getTwinCtg ( bal_e2 );
-
- if ( e1 == e2 )
- {
- realpeSize = contig_array[e1].length + overlaplen - pre_pos - pos;
-
- if ( realpeSize > 0 )
- {
- peSUM += realpeSize;
- onsameCtgPE++;
-
- if ( ( int ) contig_array[e1].length > insert_size )
- {
- int * item = ( int * ) stackPush ( isStack );
- ( *item ) = realpeSize;
- }
- }
-
- return 2;
- }
-
- gap = insert_size - overlaplen + pre_pos + pos - contig_array[e1].length - contig_array[e2].length;
-
- if ( gap < - ( insert_size / 10 ) )
- {
- ignorePE2++;
- return 0;
- }
-
- if ( gap > insert_size )
- {
- ignorePE3++;
- return 0;
- }
-
- add1Connect ( e1, e2, gap, 1, 0 );
- add1Connect ( bal_e2, bal_e1, gap, 1, 0 );
- return 1;
+ int gap, realpeSize;
+ unsigned int bal_e1, e2;
+
+ if ( e1 == bal_e2 )
+ {
+ ignorePE1++;
+ return -1; //orientation wrong
+ }
+
+ bal_e1 = getTwinCtg ( e1 );
+ e2 = getTwinCtg ( bal_e2 );
+
+ if ( e1 == e2 )
+ {
+ realpeSize = contig_array[e1].length + overlaplen - pre_pos - pos;
+
+ if ( realpeSize > 0 )
+ {
+ peSUM += realpeSize;
+ onsameCtgPE++;
+
+ if ( ( int ) contig_array[e1].length > insert_size )
+ {
+ int *item = ( int * ) stackPush ( isStack );
+ ( *item ) = realpeSize;
+ }
+ }
+
+ return 2;
+ }
+
+ gap = insert_size - overlaplen + pre_pos + pos - contig_array[e1].length - contig_array[e2].length;
+
+ if ( gap < - ( insert_size / 10 ) )
+ {
+ ignorePE2++;
+ return 0;
+ }
+
+ if ( gap > insert_size )
+ {
+ ignorePE3++;
+ return 0;
+ }
+
+ add1Connect ( e1, e2, gap, 1, 0 );
+ add1Connect ( bal_e2, bal_e1, gap, 1, 0 );
+ return 1;
}
/*************************************************
@@ -381,100 +381,100 @@ int attach1PE ( unsigned int e1, int pre_pos, unsigned int bal_e2, int pos, int
Return:
Loaded alignment record number.
*************************************************/
-int connectByPE_grad ( FILE * fp, int peGrad, char * line )
+int connectByPE_grad ( FILE *fp, int peGrad, char *line )
{
- long long pre_readno, readno, minno, maxno;
- int pre_pos, pos, flag, PE, count = 0, Total_PE = 0;
- unsigned int pre_contigno, contigno, newIndex;
-
- if ( peGrad < 0 || peGrad > gradsCounter )
- {
- fprintf ( stderr, "Specified pe grad is out of bound.\n" );
- return 0;
- }
-
- maxno = pes[peGrad].PE_bound;
-
- if ( peGrad == 0 )
- {
- minno = 0;
- }
- else
- {
- minno = pes[peGrad - 1].PE_bound;
- }
-
- onsameCtgPE = peSUM = 0;
- PE = pes[peGrad].insertS;
-
- if ( strlen ( line ) )
- {
- sscanf ( line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos );
-
- if ( pre_readno <= minno )
- {
- pre_readno = -1;
- }
- }
- else
- {
- pre_readno = -1;
- }
-
- ignorePE1 = ignorePE2 = ignorePE3 = 0;
- static_flag = 1;
- isStack = ( STACK * ) createStack ( CNBLOCKSIZE, sizeof ( int ) );
-
- while ( fgets ( line, lineLen, fp ) != NULL )
- {
- sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
-
- if ( readno > maxno )
- {
- break;
- }
-
- if ( readno <= minno )
- {
- continue;
- }
-
- newIndex = index_array[contigno];
-
- if ( isSameAsTwin ( newIndex ) )
- {
- continue;
- }
-
- if ( PE && ( readno % 2 == 0 ) && ( pre_readno == readno - 1 ) ) // they are a pair of reads
- {
- Total_PE++;
- flag = attach1PE ( pre_contigno, pre_pos, newIndex, pos, PE );
-
- if ( flag == 1 )
- {
- count++;
- }
- }
-
- pre_readno = readno;
- pre_contigno = newIndex;
- pre_pos = pos;
- }
-
- fprintf ( stderr, "For insert size: %d\n", PE );
- fprintf ( stderr, " Total PE links %d\n", Total_PE );
- fprintf ( stderr, " Normal PE links on same contig %d\n", onsameCtgPE );
- fprintf ( stderr, " Abnormal PE links on same contig %d\n", ignorePE1 );
- fprintf ( stderr, " PE links of minus distance %d\n", ignorePE2 );
- fprintf ( stderr, " PE links of plus distance %d\n", ignorePE3 );
- fprintf ( stderr, " Correct PE links %d\n", count );
- fprintf ( stderr, " Accumulated connections %d\n", newCntCounter );
- fprintf ( stderr, "Use contigs longer than %d to estimate insert size: \n", PE );
- fprintf ( stderr, " PE links %d\n", isStack->item_c );
- calcuIS ( isStack );
- freeStack ( isStack );
- return count;
+ long long pre_readno, readno, minno, maxno;
+ int pre_pos, pos, flag, PE, count = 0, Total_PE = 0;
+ unsigned int pre_contigno, contigno, newIndex;
+
+ if ( peGrad < 0 || peGrad > gradsCounter )
+ {
+ fprintf ( stderr, "Specified pe grad is out of bound.\n" );
+ return 0;
+ }
+
+ maxno = pes[peGrad].PE_bound;
+
+ if ( peGrad == 0 )
+ {
+ minno = 0;
+ }
+ else
+ {
+ minno = pes[peGrad - 1].PE_bound;
+ }
+
+ onsameCtgPE = peSUM = 0;
+ PE = pes[peGrad].insertS;
+
+ if ( strlen ( line ) )
+ {
+ sscanf ( line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos );
+
+ if ( pre_readno <= minno )
+ {
+ pre_readno = -1;
+ }
+ }
+ else
+ {
+ pre_readno = -1;
+ }
+
+ ignorePE1 = ignorePE2 = ignorePE3 = 0;
+ static_flag = 1;
+ isStack = ( STACK * ) createStack ( CNBLOCKSIZE, sizeof ( int ) );
+
+ while ( fgets ( line, lineLen, fp ) != NULL )
+ {
+ sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
+
+ if ( readno > maxno )
+ {
+ break;
+ }
+
+ if ( readno <= minno )
+ {
+ continue;
+ }
+
+ newIndex = index_array[contigno];
+
+ if ( isSameAsTwin ( newIndex ) )
+ {
+ continue;
+ }
+
+ if ( PE && ( readno % 2 == 0 ) && ( pre_readno == readno - 1 ) ) // they are a pair of reads
+ {
+ Total_PE++;
+ flag = attach1PE ( pre_contigno, pre_pos, newIndex, pos, PE );
+
+ if ( flag == 1 )
+ {
+ count++;
+ }
+ }
+
+ pre_readno = readno;
+ pre_contigno = newIndex;
+ pre_pos = pos;
+ }
+
+ fprintf ( stderr, "For insert size: %d\n", PE );
+ fprintf ( stderr, " Total PE links %d\n", Total_PE );
+ fprintf ( stderr, " Normal PE links on same contig %d\n", onsameCtgPE );
+ fprintf ( stderr, " Abnormal PE links on same contig %d\n", ignorePE1 );
+ fprintf ( stderr, " PE links of minus distance %d\n", ignorePE2 );
+ fprintf ( stderr, " PE links of plus distance %d\n", ignorePE3 );
+ fprintf ( stderr, " Correct PE links %d\n", count );
+ fprintf ( stderr, " Accumulated connections %d\n", newCntCounter );
+ fprintf ( stderr, "Use contigs longer than %d to estimate insert size: \n", PE );
+ fprintf ( stderr, " PE links %d\n", isStack->item_c );
+ calcuIS ( isStack );
+ freeStack ( isStack );
+ return count;
}
/*************************************************
@@ -492,100 +492,100 @@ int connectByPE_grad ( FILE * fp, int peGrad, char * line )
Return:
Loaded alignment record number.
*************************************************/
-int connectByPE_grad_gz ( gzFile * fp, int peGrad, char * line )
+int connectByPE_grad_gz ( gzFile *fp, int peGrad, char *line )
{
- long long pre_readno, readno, minno, maxno;
- int pre_pos, pos, flag, PE, count = 0, Total_PE = 0;
- unsigned int pre_contigno, contigno, newIndex;
-
- if ( peGrad < 0 || peGrad > gradsCounter )
- {
- fprintf ( stderr, "Specified pe grad is out of bound.\n" );
- return 0;
- }
-
- maxno = pes[peGrad].PE_bound;
-
- if ( peGrad == 0 )
- {
- minno = 0;
- }
- else
- {
- minno = pes[peGrad - 1].PE_bound;
- }
-
- onsameCtgPE = peSUM = 0;
- PE = pes[peGrad].insertS;
-
- if ( strlen ( line ) )
- {
- sscanf ( line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos );
-
- if ( pre_readno <= minno )
- {
- pre_readno = -1;
- }
- }
- else
- {
- pre_readno = -1;
- }
-
- ignorePE1 = ignorePE2 = ignorePE3 = 0;
- static_flag = 1;
- isStack = ( STACK * ) createStack ( CNBLOCKSIZE, sizeof ( int ) );
-
- while ( gzgets ( fp, line, lineLen ) != NULL )
- {
- sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
-
- if ( readno > maxno )
- {
- break;
- }
-
- if ( readno <= minno )
- {
- continue;
- }
-
- newIndex = index_array[contigno];
-
- if ( isSameAsTwin ( newIndex ) )
- {
- continue;
- }
-
- if ( PE && ( readno % 2 == 0 ) && ( pre_readno == readno - 1 ) ) // they are a pair of reads
- {
- Total_PE++;
- flag = attach1PE ( pre_contigno, pre_pos, newIndex, pos, PE );
-
- if ( flag == 1 )
- {
- count++;
- }
- }
-
- pre_readno = readno;
- pre_contigno = newIndex;
- pre_pos = pos;
- }
-
- fprintf ( stderr, "For insert size: %d\n", PE );
- fprintf ( stderr, " Total PE links %d\n", Total_PE );
- fprintf ( stderr, " Normal PE links on same contig %d\n", onsameCtgPE );
- fprintf ( stderr, " Incorrect oriented PE links %d\n", ignorePE1 );
- fprintf ( stderr, " PE links of too small insert size %d\n", ignorePE2 );
- fprintf ( stderr, " PE links of too large insert size %d\n", ignorePE3 );
- fprintf ( stderr, " Correct PE links %d\n", count );
- fprintf ( stderr, " Accumulated connections %d\n", newCntCounter );
- fprintf ( stderr, "Use contigs longer than %d to estimate insert size: \n", PE );
- fprintf ( stderr, " PE links %d\n", isStack->item_c );
- calcuIS ( isStack );
- freeStack ( isStack );
- return count;
+ long long pre_readno, readno, minno, maxno;
+ int pre_pos, pos, flag, PE, count = 0, Total_PE = 0;
+ unsigned int pre_contigno, contigno, newIndex;
+
+ if ( peGrad < 0 || peGrad > gradsCounter )
+ {
+ fprintf ( stderr, "Specified pe grad is out of bound.\n" );
+ return 0;
+ }
+
+ maxno = pes[peGrad].PE_bound;
+
+ if ( peGrad == 0 )
+ {
+ minno = 0;
+ }
+ else
+ {
+ minno = pes[peGrad - 1].PE_bound;
+ }
+
+ onsameCtgPE = peSUM = 0;
+ PE = pes[peGrad].insertS;
+
+ if ( strlen ( line ) )
+ {
+ sscanf ( line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos );
+
+ if ( pre_readno <= minno )
+ {
+ pre_readno = -1;
+ }
+ }
+ else
+ {
+ pre_readno = -1;
+ }
+
+ ignorePE1 = ignorePE2 = ignorePE3 = 0;
+ static_flag = 1;
+ isStack = ( STACK * ) createStack ( CNBLOCKSIZE, sizeof ( int ) );
+
+ while ( gzgets ( fp, line, lineLen ) != NULL )
+ {
+ sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
+
+ if ( readno > maxno )
+ {
+ break;
+ }
+
+ if ( readno <= minno )
+ {
+ continue;
+ }
+
+ newIndex = index_array[contigno];
+
+ if ( isSameAsTwin ( newIndex ) )
+ {
+ continue;
+ }
+
+ if ( PE && ( readno % 2 == 0 ) && ( pre_readno == readno - 1 ) ) // they are a pair of reads
+ {
+ Total_PE++;
+ flag = attach1PE ( pre_contigno, pre_pos, newIndex, pos, PE );
+
+ if ( flag == 1 )
+ {
+ count++;
+ }
+ }
+
+ pre_readno = readno;
+ pre_contigno = newIndex;
+ pre_pos = pos;
+ }
+
+ fprintf ( stderr, "For insert size: %d\n", PE );
+ fprintf ( stderr, " Total PE links %d\n", Total_PE );
+ fprintf ( stderr, " Normal PE links on same contig %d\n", onsameCtgPE );
+ fprintf ( stderr, " Incorrect oriented PE links %d\n", ignorePE1 );
+ fprintf ( stderr, " PE links of too small insert size %d\n", ignorePE2 );
+ fprintf ( stderr, " PE links of too large insert size %d\n", ignorePE3 );
+ fprintf ( stderr, " Correct PE links %d\n", count );
+ fprintf ( stderr, " Accumulated connections %d\n", newCntCounter );
+ fprintf ( stderr, "Use contigs longer than %d to estimate insert size: \n", PE );
+ fprintf ( stderr, " PE links %d\n", isStack->item_c );
+ calcuIS ( isStack );
+ freeStack ( isStack );
+ return count;
}
/*************************************************
@@ -601,78 +601,84 @@ int connectByPE_grad_gz ( gzFile * fp, int peGrad, char * line )
Return:
Calculated insert size.
*************************************************/
-static int calcuIS ( STACK * intStack )
+static int calcuIS ( STACK *intStack )
{
- long long sum = 0;
- int avg = 0;
- int * item;
- int num = intStack->item_c;
-
- if ( num < 100 )
- {
- fprintf ( stderr, "Too few PE links.\n" );
- return avg;
- }
-
- stackBackup ( intStack );
-
- while ( ( item = ( int * ) stackPop ( intStack ) ) != NULL )
- {
- sum += *item;
- }
-
- stackRecover ( intStack );
- num = intStack->item_c;
- avg = sum / num;
- sum = 0;
- stackBackup ( intStack );
-
- while ( ( item = ( int * ) stackPop ( intStack ) ) != NULL )
- {
- sum += ( ( long long ) * item - avg ) * ( ( long long ) * item - avg );
- }
-
- int SD = sqrt ( sum / ( num - 1 ) );
-
- if ( SD == 0 )
- {
- fprintf ( stderr, " Average insert size %d\n SD %d\n", avg, SD );
- return avg;
- }
-
- stackRecover ( intStack );
- sum = num = 0;
-
- while ( ( item = ( int * ) stackPop ( intStack ) ) != NULL )
- if ( abs ( *item - avg ) < 3 * SD )
- {
- sum += *item;
- num++;
- }
-
- if ( num == 0 ) { avg = 0; }
- else { avg = sum / num; }
-
- fprintf ( stderr, " Average insert size %d\n SD %d\n", avg, SD );
- return avg;
+ long long sum = 0;
+ int avg = 0;
+ int *item;
+ int num = intStack->item_c;
+
+ if ( num < 100 )
+ {
+ fprintf ( stderr, "Too few PE links.\n" );
+ return avg;
+ }
+
+ stackBackup ( intStack );
+
+ while ( ( item = ( int * ) stackPop ( intStack ) ) != NULL )
+ {
+ sum += *item;
+ }
+
+ stackRecover ( intStack );
+ num = intStack->item_c;
+ avg = sum / num;
+ sum = 0;
+ stackBackup ( intStack );
+
+ while ( ( item = ( int * ) stackPop ( intStack ) ) != NULL )
+ {
+ sum += ( ( long long ) * item - avg ) * ( ( long long ) * item - avg );
+ }
+
+ int SD = sqrt ( sum / ( num - 1 ) );
+
+ if ( SD == 0 )
+ {
+ fprintf ( stderr, " Average insert size %d\n SD %d\n", avg, SD );
+ return avg;
+ }
+
+ stackRecover ( intStack );
+ sum = num = 0;
+
+ while ( ( item = ( int * ) stackPop ( intStack ) ) != NULL )
+ if ( abs ( *item - avg ) < 3 * SD )
+ {
+ sum += *item;
+ num++;
+ }
+
+ if ( num == 0 )
+ {
+ avg = 0;
+ }
+ else
+ {
+ avg = sum / num;
+ }
+
+ fprintf ( stderr, " Average insert size %d\n SD %d\n", avg, SD );
+ return avg;
}
unsigned int getTwinCtg ( unsigned int ctg )
{
- return ctg + contig_array[ctg].bal_edge - 1;
+ return ctg + contig_array[ctg].bal_edge - 1;
}
boolean isSmallerThanTwin ( unsigned int ctg )
{
- return contig_array[ctg].bal_edge > 1;
+ return contig_array[ctg].bal_edge > 1;
}
boolean isLargerThanTwin ( unsigned int ctg )
{
- return contig_array[ctg].bal_edge < 1;
+ return contig_array[ctg].bal_edge < 1;
}
boolean isSameAsTwin ( unsigned int ctg )
{
- return contig_array[ctg].bal_edge == 1;
+ return contig_array[ctg].bal_edge == 1;
}
diff --git a/standardPregraph/bubble.c b/standardPregraph/bubble.c
index dd83a4c..56f0e97 100644
--- a/standardPregraph/bubble.c
+++ b/standardPregraph/bubble.c
@@ -1,7 +1,7 @@
/*
* bubble.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -41,21 +41,21 @@ static int MAXNODELENGTH; // the limit for the edge in the path
static int DIFF; // the mininum for the difference between the paths
static unsigned int outNodeArray[MAXCONNECTION]; //
-static ARC * outArcArray[MAXCONNECTION];
+static ARC *outArcArray[MAXCONNECTION];
static boolean HasChanged; // whether reset the arc
static const int INDEL = 0;
static const int SIM[4][4] = // the score matrix of comparison
{
- {1, 0, 0, 0},
- {0, 1, 0, 0},
- {0, 0, 1, 0},
- {0, 0, 0, 1}
+ {1, 0, 0, 0},
+ {0, 1, 0, 0},
+ {0, 0, 1, 0},
+ {0, 0, 0, 1}
};
//static variables
-static READINTERVAL * fastPath; // used to record the ordered edges, which is the saved path
-static READINTERVAL * slowPath; // used to record the ordered edges, which is the merged path
+static READINTERVAL *fastPath; // used to record the ordered edges, which is the saved path
+static READINTERVAL *slowPath; // used to record the ordered edges, which is the merged path
static char fastSequence[MAXREADLENGTH]; // used to record the sequence of the fast path
static char slowSequence[MAXREADLENGTH]; // used to record the sequence of the slow path
@@ -63,18 +63,18 @@ static char slowSequence[MAXREADLENGTH]; // used to record the sequence o
static int fastSeqLength; // the length of the sequence of the fast path
static int slowSeqLength; // the length of the sequence of the slow path
-static Time * times; // record the weight from the upstream edge to the current edge and used to decide which upstream edge is better
-static unsigned int * previous; // record the upstream edge
+static Time *times; // record the weight from the upstream edge to the current edge and used to decide which upstream edge is better
+static unsigned int *previous; // record the upstream edge
static unsigned int expCounter;
-static unsigned int * expanded;
+static unsigned int *expanded;
static double cutoff; // the mini difference between the paths
static int Fmatrix[MAXREADLENGTH + 1][MAXREADLENGTH + 1]; //the score matrix of comparing the paths
static int slowToFastMapping[MAXREADLENGTH + 1]; // the edge in the slow path map to the fast path
static int fastToSlowMapping[MAXREADLENGTH + 1]; // the edge in the fast path map to the slow path
-static DFibHeapNode ** dheapNodes;
-static DFibHeap * dheap;
+static DFibHeapNode **dheapNodes;
+static DFibHeap *dheap;
static unsigned int activeNode;
@@ -82,7 +82,7 @@ static unsigned int activeNode;
static unsigned int startingNode;
static int progress;
-static unsigned int * eligibleStartingPoints;
+static unsigned int *eligibleStartingPoints;
// DEBUG
static long long caseA, caseB, caseC, caseD, caseE;
@@ -121,57 +121,57 @@ static void output_contig1(int id, EDGE *edge)
}
printf("\n");
}*/
-static void output_seq ( char * seq, int length, FILE * fp, unsigned int from_vt, unsigned int dest )
+static void output_seq ( char *seq, int length, FILE *fp, unsigned int from_vt, unsigned int dest )
{
- int i;
- Kmer kmer;
- kmer = vt_array[from_vt].kmer;
- printKmerSeq ( fp, kmer );
- fprintf ( fp, " " );
-
- for ( i = 0; i < length; i++ )
- {
- fprintf ( fp, "%c", int2base ( ( int ) seq[i] ) );
- }
-
- if ( edge_array[dest].seq )
- {
- fprintf ( fp, " %c\n", int2base ( ( int ) getCharInTightString ( edge_array[dest].seq, 0 ) ) );
- }
- else
- {
- fprintf ( fp, " N\n" );
- }
+ int i;
+ Kmer kmer;
+ kmer = vt_array[from_vt].kmer;
+ printKmerSeq ( fp, kmer );
+ fprintf ( fp, " " );
+
+ for ( i = 0; i < length; i++ )
+ {
+ fprintf ( fp, "%c", int2base ( ( int ) seq[i] ) );
+ }
+
+ if ( edge_array[dest].seq )
+ {
+ fprintf ( fp, " %c\n", int2base ( ( int ) getCharInTightString ( edge_array[dest].seq, 0 ) ) );
+ }
+ else
+ {
+ fprintf ( fp, " N\n" );
+ }
}
-static void print_path ( FILE * fp )
+static void print_path ( FILE *fp )
{
- READINTERVAL * marker;
- marker = fastPath->nextInRead;
+ READINTERVAL *marker;
+ marker = fastPath->nextInRead;
- while ( marker->nextInRead )
- {
- fprintf ( fp, "%u ", marker->edgeid );
- marker = marker->nextInRead;
- }
+ while ( marker->nextInRead )
+ {
+ fprintf ( fp, "%u ", marker->edgeid );
+ marker = marker->nextInRead;
+ }
- fprintf ( fp, "\n" );
- marker = slowPath->nextInRead;
+ fprintf ( fp, "\n" );
+ marker = slowPath->nextInRead;
- while ( marker->nextInRead )
- {
- fprintf ( fp, "%u ", marker->edgeid );
- marker = marker->nextInRead;
- }
+ while ( marker->nextInRead )
+ {
+ fprintf ( fp, "%u ", marker->edgeid );
+ marker = marker->nextInRead;
+ }
- fprintf ( fp, "\n" );
+ fprintf ( fp, "\n" );
}
-static void output_pair ( int lengthF, int lengthS, FILE * fp, int nodeF, int nodeS, boolean merged, unsigned int source, unsigned int destination )
+static void output_pair ( int lengthF, int lengthS, FILE *fp, int nodeF, int nodeS, boolean merged, unsigned int source, unsigned int destination )
{
- fprintf ( fp, "$$ %d vs %d $$ %d\n", nodeF, nodeS, merged );
- output_seq ( fastSequence, lengthF, fp, edge_array[source].to_vt, destination );
- output_seq ( slowSequence, lengthS, fp, edge_array[source].to_vt, destination );
+ fprintf ( fp, "$$ %d vs %d $$ %d\n", nodeF, nodeS, merged );
+ output_seq ( fastSequence, lengthF, fp, edge_array[source].to_vt, destination );
+ output_seq ( slowSequence, lengthS, fp, edge_array[source].to_vt, destination );
}
/*************************************************
@@ -188,64 +188,64 @@ Return:
*************************************************/
static void resetNodeStatus ()
{
- unsigned int index;
- ARC * arc;
- unsigned int bal_ed;
-
- for ( index = 1; index <= num_ed; index++ )
- {
- if ( EdSameAsTwin ( index ) )
- {
- edge_array[index].multi = 1;
- continue;
- }
-
- arc = edge_array[index].arcs;
- bal_ed = getTwinEdge ( index );
-
- while ( arc )
- {
- if ( arc->to_ed == bal_ed )
- {
- break;
- }
-
- arc = arc->next;
- }
-
- if ( arc )
- {
- edge_array[index].multi = 1;
- edge_array[bal_ed].multi = 1;
- index++;
- continue;
- }
-
- arc = edge_array[bal_ed].arcs;
-
- while ( arc )
- {
- if ( arc->to_ed == index )
- {
- break;
- }
-
- arc = arc->next;
- }
-
- if ( arc )
- {
- edge_array[index].multi = 1;
- edge_array[bal_ed].multi = 1;
- }
- else
- {
- edge_array[index].multi = 0;
- edge_array[bal_ed].multi = 0;
- }
-
- index++;
- }
+ unsigned int index;
+ ARC *arc;
+ unsigned int bal_ed;
+
+ for ( index = 1; index <= num_ed; index++ )
+ {
+ if ( EdSameAsTwin ( index ) )
+ {
+ edge_array[index].multi = 1;
+ continue;
+ }
+
+ arc = edge_array[index].arcs;
+ bal_ed = getTwinEdge ( index );
+
+ while ( arc )
+ {
+ if ( arc->to_ed == bal_ed )
+ {
+ break;
+ }
+
+ arc = arc->next;
+ }
+
+ if ( arc )
+ {
+ edge_array[index].multi = 1;
+ edge_array[bal_ed].multi = 1;
+ index++;
+ continue;
+ }
+
+ arc = edge_array[bal_ed].arcs;
+
+ while ( arc )
+ {
+ if ( arc->to_ed == index )
+ {
+ break;
+ }
+
+ arc = arc->next;
+ }
+
+ if ( arc )
+ {
+ edge_array[index].multi = 1;
+ edge_array[bal_ed].multi = 1;
+ }
+ else
+ {
+ edge_array[index].multi = 0;
+ edge_array[bal_ed].multi = 0;
+ }
+
+ index++;
+ }
}
/*
@@ -297,33 +297,33 @@ Return:
*************************************************/
static unsigned int nextStartingPoint ()
{
- unsigned int index = 1;
- unsigned int result = 0;
+ unsigned int index = 1;
+ unsigned int result = 0;
- for ( index = progress + 1; index < num_ed; index++ )
- {
- result = index;
+ for ( index = progress + 1; index < num_ed; index++ )
+ {
+ result = index;
- if ( edge_array[index].deleted || edge_array[index].length < 1 )
- {
- continue;
- }
+ if ( edge_array[index].deleted || edge_array[index].length < 1 )
+ {
+ continue;
+ }
- if ( result == 0 )
- {
- return 0;
- }
+ if ( result == 0 )
+ {
+ return 0;
+ }
- if ( edge_array[result].multi > 0 )
- {
- continue;
- }
+ if ( edge_array[result].multi > 0 )
+ {
+ continue;
+ }
- progress = index;
- return result;
- }
+ progress = index;
+ return result;
+ }
- return 0;
+ return 0;
}
/*************************************************
@@ -340,14 +340,14 @@ Return:
*************************************************/
static void updateNodeStatus ()
{
- unsigned int i, node;
-
- for ( i = 0; i < expCounter; i++ )
- {
- node = expanded[i];
- edge_array[node].multi = 1;
- edge_array[getTwinEdge ( node )].multi = 1;
- }
+ unsigned int i, node;
+
+ for ( i = 0; i < expCounter; i++ )
+ {
+ node = expanded[i];
+ edge_array[node].multi = 1;
+ edge_array[getTwinEdge ( node )].multi = 1;
+ }
}
/*************************************************
@@ -364,7 +364,7 @@ Return:
*************************************************/
unsigned int getNodePrevious ( unsigned int node )
{
- return previous[node];
+ return previous[node];
}
/*************************************************
@@ -382,32 +382,32 @@ Return:
*************************************************/
static boolean isPreviousToNode ( unsigned int previous, unsigned int target )
{
- unsigned int currentNode = target;
- unsigned int previousNode = 0;
- Time targetTime = times[target];
-
- while ( currentNode )
- {
- if ( currentNode == previous )
- {
- return 1;
- }
-
- if ( currentNode == previousNode )
- {
- return 0;
- }
-
- if ( times[currentNode] != targetTime )
- {
- return 0;
- }
-
- previousNode = currentNode;
- currentNode = getNodePrevious ( currentNode );
- }
-
- return 0;
+ unsigned int currentNode = target;
+ unsigned int previousNode = 0;
+ Time targetTime = times[target];
+
+ while ( currentNode )
+ {
+ if ( currentNode == previous )
+ {
+ return 1;
+ }
+
+ if ( currentNode == previousNode )
+ {
+ return 0;
+ }
+
+ if ( times[currentNode] != targetTime )
+ {
+ return 0;
+ }
+
+ previousNode = currentNode;
+ currentNode = getNodePrevious ( currentNode );
+ }
+
+ return 0;
}
/*************************************************
@@ -425,17 +425,17 @@ Output:
Return:
None.
*************************************************/
-static void copySeq ( char * targetS, char * sourceS, int pos, int length )
+static void copySeq ( char *targetS, char *sourceS, int pos, int length )
{
- char ch;
- int i, index;
- index = pos;
-
- for ( i = 0; i < length; i++ )
- {
- ch = getCharInTightString ( sourceS, i );
- targetS[index++] = ch;
- }
+ char ch;
+ int i, index;
+ index = pos;
+
+ for ( i = 0; i < length; i++ )
+ {
+ ch = getCharInTightString ( sourceS, i );
+ targetS[index++] = ch;
+ }
}
/*************************************************
@@ -451,54 +451,54 @@ Output:
Return:
The length of sequence.
*************************************************/
-static int extractSequence ( READINTERVAL * path, char * sequence )
+static int extractSequence ( READINTERVAL *path, char *sequence )
{
- READINTERVAL * marker;
- int seqLength, writeIndex;
- seqLength = writeIndex = 0;
- path->start = -10;
- marker = path->nextInRead;
-
- while ( marker->nextInRead )
- {
- marker->start = seqLength;
- seqLength += edge_array[marker->edgeid].length;
- marker = marker->nextInRead;
- }
-
- marker->start = seqLength;
-
- if ( seqLength > MAXREADLENGTH )
- {
- return 0;
- }
-
- marker = path->nextInRead;
-
- while ( marker->nextInRead )
- {
- if ( edge_array[marker->edgeid].length && edge_array[marker->edgeid].seq )
- {
- copySeq ( sequence, edge_array[marker->edgeid].seq, writeIndex, edge_array[marker->edgeid].length );
- writeIndex += edge_array[marker->edgeid].length;
- }
-
- /*
- else if(edge_array[marker->edgeid].length==0)
- printf("node %d with length 0 in this path\n",marker->edgeid);
- else if(edge_array[marker->edgeid].seq==NULL)
- printf("node %d without seq in this path\n",marker->edgeid);
- */
- marker = marker->nextInRead;
- }
-
- return seqLength;
+ READINTERVAL *marker;
+ int seqLength, writeIndex;
+ seqLength = writeIndex = 0;
+ path->start = -10;
+ marker = path->nextInRead;
+
+ while ( marker->nextInRead )
+ {
+ marker->start = seqLength;
+ seqLength += edge_array[marker->edgeid].length;
+ marker = marker->nextInRead;
+ }
+
+ marker->start = seqLength;
+
+ if ( seqLength > MAXREADLENGTH )
+ {
+ return 0;
+ }
+
+ marker = path->nextInRead;
+
+ while ( marker->nextInRead )
+ {
+ if ( edge_array[marker->edgeid].length && edge_array[marker->edgeid].seq )
+ {
+ copySeq ( sequence, edge_array[marker->edgeid].seq, writeIndex, edge_array[marker->edgeid].length );
+ writeIndex += edge_array[marker->edgeid].length;
+ }
+
+ /*
+ else if(edge_array[marker->edgeid].length==0)
+ printf("node %d with length 0 in this path\n",marker->edgeid);
+ else if(edge_array[marker->edgeid].seq==NULL)
+ printf("node %d without seq in this path\n",marker->edgeid);
+ */
+ marker = marker->nextInRead;
+ }
+
+ return seqLength;
}
static int max ( int A, int B, int C )
{
- A = A >= B ? A : B;
- return ( A >= C ? A : C );
+ A = A >= B ? A : B;
+ return ( A >= C ? A : C );
}
/*************************************************
@@ -516,76 +516,76 @@ Output:
Return:
0 if the bubble is not suitable to merge.
*************************************************/
-static boolean compareSequences ( char * sequence1, char * sequence2, int length1, int length2 )
+static boolean compareSequences ( char *sequence1, char *sequence2, int length1, int length2 )
{
- int i, j;
- int maxLength;
- int Choice1, Choice2, Choice3;
- int maxScore;
-
- if ( length1 == 0 || length2 == 0 )
- {
- caseA++;
- return 0;
- }
-
- if ( abs ( ( int ) length1 - ( int ) length2 ) > 2 )
- {
- caseB++;
- return 0;
- }
-
- if ( length1 < overlaplen - 1 || length2 < overlaplen - 1 )
- {
- caseE++;
- return 0;
- }
-
- /*
- if (length1 < overlaplen || length2 < overlaplen){
- if(abs((int)length1 - (int)length2) > 3){
- caseB++;
- return 0;
- }
- }
- */
- for ( i = 0; i <= length1; i++ )
- {
- Fmatrix[i][0] = 0;
- }
-
- for ( j = 0; j <= length2; j++ )
- {
- Fmatrix[0][j] = 0;
- }
-
- for ( i = 1; i <= length1; i++ )
- {
- for ( j = 1; j <= length2; j++ )
- {
- Choice1 = Fmatrix[i - 1][j - 1] + SIM[ ( int ) sequence1[i - 1]][ ( int ) sequence2[j - 1]];
- Choice2 = Fmatrix[i - 1][j] + INDEL;
- Choice3 = Fmatrix[i][j - 1] + INDEL;
- Fmatrix[i][j] = max ( Choice1, Choice2, Choice3 );
- }
- }
-
- maxScore = Fmatrix[length1][length2];
- maxLength = ( length1 > length2 ? length1 : length2 );
-
- if ( maxScore < maxLength - DIFF )
- {
- caseC++;
- return 0;
- }
-
- if ( ( 1 - ( double ) maxScore / maxLength ) > cutoff )
- {
- caseD++;
- return 0;
- }
-
- return 1;
+ int i, j;
+ int maxLength;
+ int Choice1, Choice2, Choice3;
+ int maxScore;
+
+ if ( length1 == 0 || length2 == 0 )
+ {
+ caseA++;
+ return 0;
+ }
+
+ if ( abs ( ( int ) length1 - ( int ) length2 ) > 2 )
+ {
+ caseB++;
+ return 0;
+ }
+
+ if ( length1 < overlaplen - 1 || length2 < overlaplen - 1 )
+ {
+ caseE++;
+ return 0;
+ }
+
+ /*
+ if (length1 < overlaplen || length2 < overlaplen){
+ if(abs((int)length1 - (int)length2) > 3){
+ caseB++;
+ return 0;
+ }
+ }
+ */
+ for ( i = 0; i <= length1; i++ )
+ {
+ Fmatrix[i][0] = 0;
+ }
+
+ for ( j = 0; j <= length2; j++ )
+ {
+ Fmatrix[0][j] = 0;
+ }
+
+ for ( i = 1; i <= length1; i++ )
+ {
+ for ( j = 1; j <= length2; j++ )
+ {
+ Choice1 = Fmatrix[i - 1][j - 1] + SIM[ ( int ) sequence1[i - 1]][ ( int ) sequence2[j - 1]];
+ Choice2 = Fmatrix[i - 1][j] + INDEL;
+ Choice3 = Fmatrix[i][j - 1] + INDEL;
+ Fmatrix[i][j] = max ( Choice1, Choice2, Choice3 );
+ }
+ }
+
+ maxScore = Fmatrix[length1][length2];
+ maxLength = ( length1 > length2 ? length1 : length2 );
+
+ if ( maxScore < maxLength - DIFF )
+ {
+ caseC++;
+ return 0;
+ }
+
+ if ( ( 1 - ( double ) maxScore / maxLength ) > cutoff )
+ {
+ caseD++;
+ return 0;
+ }
+
+ return 1;
}
/*************************************************
@@ -602,70 +602,70 @@ Return:
*************************************************/
static void mapSlowOntoFast ()
{
- int slowIndex = slowSeqLength;
- int fastIndex = fastSeqLength;
- int fastn, slown;
-
- if ( slowIndex == 0 )
- {
- slowToFastMapping[0] = fastIndex;
-
- while ( fastIndex >= 0 )
- {
- fastToSlowMapping[fastIndex--] = 0;
- }
-
- return;
- }
-
- if ( fastIndex == 0 )
- {
- while ( slowIndex >= 0 )
- {
- slowToFastMapping[slowIndex--] = 0;
- }
-
- fastToSlowMapping[0] = slowIndex;
- return;
- }
-
- while ( slowIndex > 0 && fastIndex > 0 )
- {
- fastn = ( int ) fastSequence[fastIndex - 1]; //getCharInTightString(fastSequence,fastIndex-1);
- slown = ( int ) slowSequence[slowIndex - 1]; //getCharInTightString(slowSequence,slowIndex-1);
-
- if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex - 1][slowIndex - 1] + SIM[fastn][slown] )
- {
- fastToSlowMapping[--fastIndex] = --slowIndex;
- slowToFastMapping[slowIndex] = fastIndex;
- }
- else if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex - 1][slowIndex] + INDEL )
- {
- fastToSlowMapping[--fastIndex] = slowIndex - 1;
- }
- else if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex][slowIndex - 1] + INDEL )
- {
- slowToFastMapping[--slowIndex] = fastIndex - 1;
- }
- else
- {
- fprintf ( stderr, "Error in the step: map the slow path to the fast path.\n" );
- abort ();
- }
- }
-
- while ( slowIndex > 0 )
- {
- slowToFastMapping[--slowIndex] = -1;
- }
-
- while ( fastIndex > 0 )
- {
- fastToSlowMapping[--fastIndex] = -1;
- }
-
- slowToFastMapping[slowSeqLength] = fastSeqLength;
- fastToSlowMapping[fastSeqLength] = slowSeqLength;
+ int slowIndex = slowSeqLength;
+ int fastIndex = fastSeqLength;
+ int fastn, slown;
+
+ if ( slowIndex == 0 )
+ {
+ slowToFastMapping[0] = fastIndex;
+
+ while ( fastIndex >= 0 )
+ {
+ fastToSlowMapping[fastIndex--] = 0;
+ }
+
+ return;
+ }
+
+ if ( fastIndex == 0 )
+ {
+ while ( slowIndex >= 0 )
+ {
+ slowToFastMapping[slowIndex--] = 0;
+ }
+
+ fastToSlowMapping[0] = slowIndex;
+ return;
+ }
+
+ while ( slowIndex > 0 && fastIndex > 0 )
+ {
+ fastn = ( int ) fastSequence[fastIndex - 1]; //getCharInTightString(fastSequence,fastIndex-1);
+ slown = ( int ) slowSequence[slowIndex - 1]; //getCharInTightString(slowSequence,slowIndex-1);
+
+ if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex - 1][slowIndex - 1] + SIM[fastn][slown] )
+ {
+ fastToSlowMapping[--fastIndex] = --slowIndex;
+ slowToFastMapping[slowIndex] = fastIndex;
+ }
+ else if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex - 1][slowIndex] + INDEL )
+ {
+ fastToSlowMapping[--fastIndex] = slowIndex - 1;
+ }
+ else if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex][slowIndex - 1] + INDEL )
+ {
+ slowToFastMapping[--slowIndex] = fastIndex - 1;
+ }
+ else
+ {
+ fprintf ( stderr, "Error in the step: map the slow path to the fast path.\n" );
+ abort ();
+ }
+ }
+
+ while ( slowIndex > 0 )
+ {
+ slowToFastMapping[--slowIndex] = -1;
+ }
+
+ while ( fastIndex > 0 )
+ {
+ fastToSlowMapping[--fastIndex] = -1;
+ }
+
+ slowToFastMapping[slowSeqLength] = fastSeqLength;
+ fastToSlowMapping[fastSeqLength] = slowSeqLength;
}
/*************************************************
@@ -681,29 +681,29 @@ Output:
Return:
The new linked of the arc.
*************************************************/
-ARC * deleteArc ( ARC * arc_list, ARC * arc )
+ARC *deleteArc ( ARC *arc_list, ARC *arc )
{
- if ( arc->prev )
- {
- arc->prev->next = arc->next;
- }
- else
- {
- arc_list = arc->next;
- }
-
- if ( arc->next )
- {
- arc->next->prev = arc->prev;
- }
-
- /*
- if(checkActiveArc&&arc==activeArc){
- activeArc = arc->next;
- }
- */
- dismissArc ( arc );
- return arc_list;
+ if ( arc->prev )
+ {
+ arc->prev->next = arc->next;
+ }
+ else
+ {
+ arc_list = arc->next;
+ }
+
+ if ( arc->next )
+ {
+ arc->next->prev = arc->prev;
+ }
+
+ /*
+ if(checkActiveArc&&arc==activeArc){
+ activeArc = arc->next;
+ }
+ */
+ dismissArc ( arc );
+ return arc_list;
}
/*************************************************
@@ -719,18 +719,18 @@ Output:
Return:
The new linked of the path.
*************************************************/
-static READINTERVAL * addRv ( READINTERVAL * rv_list, READINTERVAL * rv )
+static READINTERVAL *addRv ( READINTERVAL *rv_list, READINTERVAL *rv )
{
- rv->prevOnEdge = NULL;
- rv->nextOnEdge = rv_list;
+ rv->prevOnEdge = NULL;
+ rv->nextOnEdge = rv_list;
- if ( rv_list )
- {
- rv_list->prevOnEdge = rv;
- }
+ if ( rv_list )
+ {
+ rv_list->prevOnEdge = rv;
+ }
- rv_list = rv;
- return rv_list;
+ rv_list = rv;
+ return rv_list;
}
/*************************************************
@@ -746,23 +746,23 @@ Output:
Return:
The new linked of the path.
*************************************************/
-static READINTERVAL * deleteRv ( READINTERVAL * rv_list, READINTERVAL * rv )
+static READINTERVAL *deleteRv ( READINTERVAL *rv_list, READINTERVAL *rv )
{
- if ( rv->prevOnEdge )
- {
- rv->prevOnEdge->nextOnEdge = rv->nextOnEdge;
- }
- else
- {
- rv_list = rv->nextOnEdge;
- }
-
- if ( rv->nextOnEdge )
- {
- rv->nextOnEdge->prevOnEdge = rv->prevOnEdge;
- }
-
- return rv_list;
+ if ( rv->prevOnEdge )
+ {
+ rv->prevOnEdge->nextOnEdge = rv->nextOnEdge;
+ }
+ else
+ {
+ rv_list = rv->nextOnEdge;
+ }
+
+ if ( rv->nextOnEdge )
+ {
+ rv->nextOnEdge->prevOnEdge = rv->prevOnEdge;
+ }
+
+ return rv_list;
}
/*
@@ -798,30 +798,30 @@ Return:
*************************************************/
static int mapDistancesOntoPaths ()
{
- READINTERVAL * marker;
- int totalDistance = 0;
- marker = slowPath;
-
- while ( marker->nextInRead )
- {
- marker = marker->nextInRead;
- marker->start = totalDistance;
- totalDistance += edge_array[marker->edgeid].length;
- marker->bal_rv->start = totalDistance;
- }
-
- totalDistance = 0;
- marker = fastPath;
-
- while ( marker->nextInRead )
- {
- marker = marker->nextInRead;
- marker->start = totalDistance;
- totalDistance += edge_array[marker->edgeid].length;
- marker->bal_rv->start = totalDistance;
- }
-
- return totalDistance;
+ READINTERVAL *marker;
+ int totalDistance = 0;
+ marker = slowPath;
+
+ while ( marker->nextInRead )
+ {
+ marker = marker->nextInRead;
+ marker->start = totalDistance;
+ totalDistance += edge_array[marker->edgeid].length;
+ marker->bal_rv->start = totalDistance;
+ }
+
+ totalDistance = 0;
+ marker = fastPath;
+
+ while ( marker->nextInRead )
+ {
+ marker = marker->nextInRead;
+ marker->start = totalDistance;
+ totalDistance += edge_array[marker->edgeid].length;
+ marker->bal_rv->start = totalDistance;
+ }
+
+ return totalDistance;
}
@@ -837,45 +837,45 @@ Output:
Return:
None.
*************************************************/
-static void attachPath ( READINTERVAL * path )
+static void attachPath ( READINTERVAL *path )
{
- READINTERVAL * marker, *bal_marker;
- unsigned int ed, bal_ed;
- marker = path;
-
- while ( marker )
- {
- ed = marker->edgeid;
- edge_array[ed].rv = addRv ( edge_array[ed].rv, marker );
- bal_ed = getTwinEdge ( ed );
- bal_marker = allocateRV ( -marker->readid, bal_ed );
- edge_array[bal_ed].rv = addRv ( edge_array[bal_ed].rv, bal_marker );
-
- if ( marker->prevInRead )
- {
- marker->prevInRead->bal_rv->prevInRead = bal_marker;
- bal_marker->nextInRead = marker->prevInRead->bal_rv;
- }
-
- bal_marker->bal_rv = marker;
- marker->bal_rv = bal_marker;
- marker = marker->nextInRead;
- }
+ READINTERVAL *marker, *bal_marker;
+ unsigned int ed, bal_ed;
+ marker = path;
+
+ while ( marker )
+ {
+ ed = marker->edgeid;
+ edge_array[ed].rv = addRv ( edge_array[ed].rv, marker );
+ bal_ed = getTwinEdge ( ed );
+ bal_marker = allocateRV ( -marker->readid, bal_ed );
+ edge_array[bal_ed].rv = addRv ( edge_array[bal_ed].rv, bal_marker );
+
+ if ( marker->prevInRead )
+ {
+ marker->prevInRead->bal_rv->prevInRead = bal_marker;
+ bal_marker->nextInRead = marker->prevInRead->bal_rv;
+ }
+
+ bal_marker->bal_rv = marker;
+ marker->bal_rv = bal_marker;
+ marker = marker->nextInRead;
+ }
}
-static void detachPathSingle ( READINTERVAL * path )
+static void detachPathSingle ( READINTERVAL *path )
{
- READINTERVAL * marker, *nextMarker;
- unsigned int ed;
- marker = path;
-
- while ( marker )
- {
- nextMarker = marker->nextInRead;
- ed = marker->edgeid;
- edge_array[ed].rv = deleteRv ( edge_array[ed].rv, marker );
- dismissRV ( marker );
- marker = nextMarker;
- }
+ READINTERVAL *marker, *nextMarker;
+ unsigned int ed;
+ marker = path;
+
+ while ( marker )
+ {
+ nextMarker = marker->nextInRead;
+ ed = marker->edgeid;
+ edge_array[ed].rv = deleteRv ( edge_array[ed].rv, marker );
+ dismissRV ( marker );
+ marker = nextMarker;
+ }
}
/*************************************************
@@ -890,269 +890,269 @@ Output:
Return:
None.
*************************************************/
-static void detachPath ( READINTERVAL * path )
+static void detachPath ( READINTERVAL *path )
{
- READINTERVAL * marker, *bal_marker, *nextMarker;
- unsigned int ed, bal_ed;
- marker = path;
-
- while ( marker )
- {
- nextMarker = marker->nextInRead;
- bal_marker = marker->bal_rv;
- ed = marker->edgeid;
- edge_array[ed].rv = deleteRv ( edge_array[ed].rv, marker );
- dismissRV ( marker );
- bal_ed = getTwinEdge ( ed );
- edge_array[bal_ed].rv = deleteRv ( edge_array[bal_ed].rv, bal_marker );
- dismissRV ( bal_marker );
- marker = nextMarker;
- }
+ READINTERVAL *marker, *bal_marker, *nextMarker;
+ unsigned int ed, bal_ed;
+ marker = path;
+
+ while ( marker )
+ {
+ nextMarker = marker->nextInRead;
+ bal_marker = marker->bal_rv;
+ ed = marker->edgeid;
+ edge_array[ed].rv = deleteRv ( edge_array[ed].rv, marker );
+ dismissRV ( marker );
+ bal_ed = getTwinEdge ( ed );
+ edge_array[bal_ed].rv = deleteRv ( edge_array[bal_ed].rv, bal_marker );
+ dismissRV ( bal_marker );
+ marker = nextMarker;
+ }
}
static void remapNodeMarkersOntoNeighbour ( unsigned int source, unsigned int target )
{
- READINTERVAL * marker, *bal_marker;
- unsigned int bal_source = getTwinEdge ( source );
- unsigned int bal_target = getTwinEdge ( target );
-
- while ( ( marker = edge_array[source].rv ) != NULL )
- {
- edge_array[source].rv = deleteRv ( edge_array[source].rv, marker );
- marker->edgeid = target;
- edge_array[target].rv = addRv ( edge_array[target].rv, marker );
- bal_marker = marker->bal_rv;
- edge_array[bal_source].rv = deleteRv ( edge_array[bal_source].rv, bal_marker );
- bal_marker->edgeid = bal_target;
- edge_array[bal_target].rv = addRv ( edge_array[bal_target].rv, bal_marker );
- }
+ READINTERVAL *marker, *bal_marker;
+ unsigned int bal_source = getTwinEdge ( source );
+ unsigned int bal_target = getTwinEdge ( target );
+
+ while ( ( marker = edge_array[source].rv ) != NULL )
+ {
+ edge_array[source].rv = deleteRv ( edge_array[source].rv, marker );
+ marker->edgeid = target;
+ edge_array[target].rv = addRv ( edge_array[target].rv, marker );
+ bal_marker = marker->bal_rv;
+ edge_array[bal_source].rv = deleteRv ( edge_array[bal_source].rv, bal_marker );
+ bal_marker->edgeid = bal_target;
+ edge_array[bal_target].rv = addRv ( edge_array[bal_target].rv, bal_marker );
+ }
}
static void remapNodeInwardReferencesOntoNode ( unsigned int source, unsigned int target )
{
- ARC * arc;
- unsigned int destination;
-
- for ( arc = edge_array[source].arcs; arc != NULL; arc = arc->next )
- {
- destination = arc->to_ed;
-
- if ( destination == target || destination == source )
- {
- continue;
- }
-
- if ( previous[destination] == source )
- {
- previous[destination] = target;
- }
- }
+ ARC *arc;
+ unsigned int destination;
+
+ for ( arc = edge_array[source].arcs; arc != NULL; arc = arc->next )
+ {
+ destination = arc->to_ed;
+
+ if ( destination == target || destination == source )
+ {
+ continue;
+ }
+
+ if ( previous[destination] == source )
+ {
+ previous[destination] = target;
+ }
+ }
}
static void remapNodeTimesOntoTargetNode ( unsigned int source, unsigned int target )
{
- Time nodeTime = times[source];
- unsigned int prevNode = previous[source];
- Time targetTime = times[target];
-
- if ( nodeTime == -1 )
- {
- return;
- }
-
- if ( prevNode == source )
- {
- times[target] = nodeTime;
- previous[target] = target;
- }
- else if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( target, prevNode ) ) )
- {
- times[target] = nodeTime;
-
- if ( prevNode != getTwinEdge ( source ) )
- {
- previous[target] = prevNode;
- }
- else
- {
- previous[target] = getTwinEdge ( target );
- }
- }
-
- remapNodeInwardReferencesOntoNode ( source, target );
- previous[source] = 0;
+ Time nodeTime = times[source];
+ unsigned int prevNode = previous[source];
+ Time targetTime = times[target];
+
+ if ( nodeTime == -1 )
+ {
+ return;
+ }
+
+ if ( prevNode == source )
+ {
+ times[target] = nodeTime;
+ previous[target] = target;
+ }
+ else if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( target, prevNode ) ) )
+ {
+ times[target] = nodeTime;
+
+ if ( prevNode != getTwinEdge ( source ) )
+ {
+ previous[target] = prevNode;
+ }
+ else
+ {
+ previous[target] = getTwinEdge ( target );
+ }
+ }
+
+ remapNodeInwardReferencesOntoNode ( source, target );
+ previous[source] = 0;
}
static void remapNodeTimesOntoNeighbour ( unsigned int source, unsigned int target )
{
- remapNodeTimesOntoTargetNode ( source, target );
- remapNodeTimesOntoTargetNode ( getTwinEdge ( source ), getTwinEdge ( target ) ); //questionable
+ remapNodeTimesOntoTargetNode ( source, target );
+ remapNodeTimesOntoTargetNode ( getTwinEdge ( source ), getTwinEdge ( target ) ); //questionable
}
-static void destroyArc ( unsigned int from_ed, ARC * arc )
+static void destroyArc ( unsigned int from_ed, ARC *arc )
{
- unsigned int bal_dest;
- ARC * twinArc;
-
- if ( !arc )
- {
- return;
- }
-
- bal_dest = getTwinEdge ( arc->to_ed );
- twinArc = arc->bal_arc;
- removeArcInLookupTable ( from_ed, arc->to_ed );
- edge_array[from_ed].arcs = deleteArc ( edge_array[from_ed].arcs, arc );
-
- if ( bal_dest != from_ed )
- {
- removeArcInLookupTable ( bal_dest, getTwinEdge ( from_ed ) );
- edge_array[bal_dest].arcs = deleteArc ( edge_array[bal_dest].arcs, twinArc );
- }
+ unsigned int bal_dest;
+ ARC *twinArc;
+
+ if ( !arc )
+ {
+ return;
+ }
+
+ bal_dest = getTwinEdge ( arc->to_ed );
+ twinArc = arc->bal_arc;
+ removeArcInLookupTable ( from_ed, arc->to_ed );
+ edge_array[from_ed].arcs = deleteArc ( edge_array[from_ed].arcs, arc );
+
+ if ( bal_dest != from_ed )
+ {
+ removeArcInLookupTable ( bal_dest, getTwinEdge ( from_ed ) );
+ edge_array[bal_dest].arcs = deleteArc ( edge_array[bal_dest].arcs, twinArc );
+ }
}
-static void createAnalogousArc ( unsigned int originNode, unsigned int destinationNode, ARC * refArc )
+static void createAnalogousArc ( unsigned int originNode, unsigned int destinationNode, ARC *refArc )
{
- ARC * arc, *twinArc;
- unsigned int destinationTwin;
- arc = getArcBetween ( originNode, destinationNode );
-
- if ( arc )
- {
- if ( refArc->bal_arc != refArc )
- {
- arc->multiplicity += refArc->multiplicity;
- arc->bal_arc->multiplicity += refArc->multiplicity;
- }
- else
- {
- arc->multiplicity += refArc->multiplicity / 2;
- arc->bal_arc->multiplicity += refArc->multiplicity / 2;
- }
-
- return;
- }
-
- arc = allocateArc ( destinationNode );
- arc->multiplicity = refArc->multiplicity;
- arc->prev = NULL;
- arc->next = edge_array[originNode].arcs;
-
- if ( edge_array[originNode].arcs )
- {
- edge_array[originNode].arcs->prev = arc;
- }
-
- edge_array[originNode].arcs = arc;
- putArc2LookupTable ( originNode, arc );
- destinationTwin = getTwinEdge ( destinationNode );
-
- if ( destinationTwin == originNode )
- {
- arc->bal_arc = arc;
-
- if ( refArc->bal_arc != refArc )
- {
- arc->multiplicity += refArc->multiplicity;
- }
-
- return;
- }
-
- twinArc = allocateArc ( getTwinEdge ( originNode ) );
- arc->bal_arc = twinArc;
- twinArc->bal_arc = arc;
- twinArc->multiplicity = refArc->multiplicity;
- twinArc->prev = NULL;
- twinArc->next = edge_array[destinationTwin].arcs;
-
- if ( edge_array[destinationTwin].arcs )
- {
- edge_array[destinationTwin].arcs->prev = twinArc;
- }
-
- edge_array[destinationTwin].arcs = twinArc;
- putArc2LookupTable ( destinationTwin, twinArc );
+ ARC *arc, *twinArc;
+ unsigned int destinationTwin;
+ arc = getArcBetween ( originNode, destinationNode );
+
+ if ( arc )
+ {
+ if ( refArc->bal_arc != refArc )
+ {
+ arc->multiplicity += refArc->multiplicity;
+ arc->bal_arc->multiplicity += refArc->multiplicity;
+ }
+ else
+ {
+ arc->multiplicity += refArc->multiplicity / 2;
+ arc->bal_arc->multiplicity += refArc->multiplicity / 2;
+ }
+
+ return;
+ }
+
+ arc = allocateArc ( destinationNode );
+ arc->multiplicity = refArc->multiplicity;
+ arc->prev = NULL;
+ arc->next = edge_array[originNode].arcs;
+
+ if ( edge_array[originNode].arcs )
+ {
+ edge_array[originNode].arcs->prev = arc;
+ }
+
+ edge_array[originNode].arcs = arc;
+ putArc2LookupTable ( originNode, arc );
+ destinationTwin = getTwinEdge ( destinationNode );
+
+ if ( destinationTwin == originNode )
+ {
+ arc->bal_arc = arc;
+
+ if ( refArc->bal_arc != refArc )
+ {
+ arc->multiplicity += refArc->multiplicity;
+ }
+
+ return;
+ }
+
+ twinArc = allocateArc ( getTwinEdge ( originNode ) );
+ arc->bal_arc = twinArc;
+ twinArc->bal_arc = arc;
+ twinArc->multiplicity = refArc->multiplicity;
+ twinArc->prev = NULL;
+ twinArc->next = edge_array[destinationTwin].arcs;
+
+ if ( edge_array[destinationTwin].arcs )
+ {
+ edge_array[destinationTwin].arcs->prev = twinArc;
+ }
+
+ edge_array[destinationTwin].arcs = twinArc;
+ putArc2LookupTable ( destinationTwin, twinArc );
}
static void remapNodeArcsOntoTarget ( unsigned int source, unsigned int target )
{
- ARC * arc;
-
- if ( source == activeNode )
- {
- activeNode = target;
- }
-
- arc = edge_array[source].arcs;
-
- if ( !arc )
- {
- return;
- }
-
- while ( arc != NULL )
- {
- createAnalogousArc ( target, arc->to_ed, arc );
- destroyArc ( source, arc );
- arc = edge_array[source].arcs;
- }
+ ARC *arc;
+
+ if ( source == activeNode )
+ {
+ activeNode = target;
+ }
+
+ arc = edge_array[source].arcs;
+
+ if ( !arc )
+ {
+ return;
+ }
+
+ while ( arc != NULL )
+ {
+ createAnalogousArc ( target, arc->to_ed, arc );
+ destroyArc ( source, arc );
+ arc = edge_array[source].arcs;
+ }
}
static void remapNodeArcsOntoNeighbour ( unsigned int source, unsigned int target )
{
- remapNodeArcsOntoTarget ( source, target );
- remapNodeArcsOntoTarget ( getTwinEdge ( source ), getTwinEdge ( target ) );
+ remapNodeArcsOntoTarget ( source, target );
+ remapNodeArcsOntoTarget ( getTwinEdge ( source ), getTwinEdge ( target ) );
}
-static DFibHeapNode * getNodeDHeapNode ( unsigned int node )
+static DFibHeapNode *getNodeDHeapNode ( unsigned int node )
{
- return dheapNodes[node];
+ return dheapNodes[node];
}
-static void setNodeDHeapNode ( unsigned int node, DFibHeapNode * dheapNode )
+static void setNodeDHeapNode ( unsigned int node, DFibHeapNode *dheapNode )
{
- dheapNodes[node] = dheapNode;
+ dheapNodes[node] = dheapNode;
}
static void remapNodeFibHeapReferencesOntoNode ( unsigned int source, unsigned int target )
{
- DFibHeapNode * sourceDHeapNode = getNodeDHeapNode ( source );
- DFibHeapNode * targetDHeapNode = getNodeDHeapNode ( target );
-
- if ( sourceDHeapNode == NULL )
- {
- return;
- }
-
- if ( targetDHeapNode == NULL )
- {
- setNodeDHeapNode ( target, sourceDHeapNode );
- replaceValueInDHeap ( sourceDHeapNode, target );
- }
- else if ( getKey ( targetDHeapNode ) > getKey ( sourceDHeapNode ) )
- {
- setNodeDHeapNode ( target, sourceDHeapNode );
- replaceValueInDHeap ( sourceDHeapNode, target );
- destroyNodeInDHeap ( targetDHeapNode, dheap );
- }
- else
- {
- destroyNodeInDHeap ( sourceDHeapNode, dheap );
- }
-
- setNodeDHeapNode ( source, NULL );
+ DFibHeapNode *sourceDHeapNode = getNodeDHeapNode ( source );
+ DFibHeapNode *targetDHeapNode = getNodeDHeapNode ( target );
+
+ if ( sourceDHeapNode == NULL )
+ {
+ return;
+ }
+
+ if ( targetDHeapNode == NULL )
+ {
+ setNodeDHeapNode ( target, sourceDHeapNode );
+ replaceValueInDHeap ( sourceDHeapNode, target );
+ }
+ else if ( getKey ( targetDHeapNode ) > getKey ( sourceDHeapNode ) )
+ {
+ setNodeDHeapNode ( target, sourceDHeapNode );
+ replaceValueInDHeap ( sourceDHeapNode, target );
+ destroyNodeInDHeap ( targetDHeapNode, dheap );
+ }
+ else
+ {
+ destroyNodeInDHeap ( sourceDHeapNode, dheap );
+ }
+
+ setNodeDHeapNode ( source, NULL );
}
static void combineCOV ( unsigned int source, int len_s, unsigned int target, int len_t )
{
- if ( len_s < 1 || len_t < 1 )
- {
- return;
- }
-
- int cov = ( len_s * edge_array[source].cvg + len_t * edge_array[target].cvg ) / len_t;
- edge_array[target].cvg = cov > MaxEdgeCov ? MaxEdgeCov : cov;
- edge_array[getTwinEdge ( target )].cvg = cov > MaxEdgeCov ? MaxEdgeCov : cov;
+ if ( len_s < 1 || len_t < 1 )
+ {
+ return;
+ }
+
+ int cov = ( len_s * edge_array[source].cvg + len_t * edge_array[target].cvg ) / len_t;
+ edge_array[target].cvg = cov > MaxEdgeCov ? MaxEdgeCov : cov;
+ edge_array[getTwinEdge ( target )].cvg = cov > MaxEdgeCov ? MaxEdgeCov : cov;
}
/*************************************************
@@ -1170,370 +1170,370 @@ Return:
*************************************************/
static void remapNodeOntoNeighbour ( unsigned int source, unsigned int target )
{
- combineCOV ( source, edge_array[source].length, target, edge_array[target].length );
- remapNodeMarkersOntoNeighbour ( source, target );
- remapNodeTimesOntoNeighbour ( source, target ); //questionable
- remapNodeArcsOntoNeighbour ( source, target );
- remapNodeFibHeapReferencesOntoNode ( source, target );
- remapNodeFibHeapReferencesOntoNode ( getTwinEdge ( source ), getTwinEdge ( target ) );
- edge_array[source].deleted = 1;
- edge_array[getTwinEdge ( source )].deleted = 1;
-
- if ( startingNode == source )
- {
- startingNode = target;
- }
-
- if ( startingNode == getTwinEdge ( source ) )
- {
- startingNode = getTwinEdge ( target );
- }
-
- edge_array[source].length = 0;
- edge_array[getTwinEdge ( source )].length = 0;
+ combineCOV ( source, edge_array[source].length, target, edge_array[target].length );
+ remapNodeMarkersOntoNeighbour ( source, target );
+ remapNodeTimesOntoNeighbour ( source, target ); //questionable
+ remapNodeArcsOntoNeighbour ( source, target );
+ remapNodeFibHeapReferencesOntoNode ( source, target );
+ remapNodeFibHeapReferencesOntoNode ( getTwinEdge ( source ), getTwinEdge ( target ) );
+ edge_array[source].deleted = 1;
+ edge_array[getTwinEdge ( source )].deleted = 1;
+
+ if ( startingNode == source )
+ {
+ startingNode = target;
+ }
+
+ if ( startingNode == getTwinEdge ( source ) )
+ {
+ startingNode = getTwinEdge ( target );
+ }
+
+ edge_array[source].length = 0;
+ edge_array[getTwinEdge ( source )].length = 0;
}
-static void connectInRead ( READINTERVAL * previous, READINTERVAL * next )
+static void connectInRead ( READINTERVAL *previous, READINTERVAL *next )
{
- if ( previous )
- {
- previous->nextInRead = next;
-
- if ( next )
- {
- previous->bal_rv->prevInRead = next->bal_rv;
- }
- else
- {
- previous->bal_rv->prevInRead = NULL;
- }
- }
-
- if ( next )
- {
- next->prevInRead = previous;
-
- if ( previous )
- {
- next->bal_rv->nextInRead = previous->bal_rv;
- }
- else
- {
- next->bal_rv->nextInRead = NULL;
- }
- }
+ if ( previous )
+ {
+ previous->nextInRead = next;
+
+ if ( next )
+ {
+ previous->bal_rv->prevInRead = next->bal_rv;
+ }
+ else
+ {
+ previous->bal_rv->prevInRead = NULL;
+ }
+ }
+
+ if ( next )
+ {
+ next->prevInRead = previous;
+
+ if ( previous )
+ {
+ next->bal_rv->nextInRead = previous->bal_rv;
+ }
+ else
+ {
+ next->bal_rv->nextInRead = NULL;
+ }
+ }
}
-static int remapBackOfNodeMarkersOntoNeighbour ( unsigned int source, READINTERVAL * sourceMarker, unsigned int target, READINTERVAL * targetMarker, boolean slowToFast )
+static int remapBackOfNodeMarkersOntoNeighbour ( unsigned int source, READINTERVAL *sourceMarker, unsigned int target, READINTERVAL *targetMarker, boolean slowToFast )
{
- READINTERVAL * marker, *newMarker, *bal_new, *previousMarker;
- int halfwayPoint, halfwayPointOffset, breakpoint;
- int * targetToSourceMapping, *sourceToTargetMapping;
- unsigned int bal_ed;
- int targetFinish = targetMarker->bal_rv->start;
- int sourceStart = sourceMarker->start;
- int sourceFinish = sourceMarker->bal_rv->start;
- int alignedSourceLength = sourceFinish - sourceStart;
- int realSourceLength = edge_array[source].length;
-
- if ( slowToFast )
- {
- sourceToTargetMapping = slowToFastMapping;
- targetToSourceMapping = fastToSlowMapping;
- }
- else
- {
- sourceToTargetMapping = fastToSlowMapping;
- targetToSourceMapping = slowToFastMapping;
- }
-
- if ( alignedSourceLength > 0 && targetFinish > 0 )
- {
- halfwayPoint = targetToSourceMapping[targetFinish - 1] - sourceStart + 1;
- halfwayPoint *= realSourceLength;
- halfwayPoint /= alignedSourceLength;
- }
- else
- {
- halfwayPoint = 0;
- }
-
- if ( halfwayPoint < 0 )
- {
- halfwayPoint = 0;
- }
-
- if ( halfwayPoint > realSourceLength )
- {
- halfwayPoint = realSourceLength;
- }
-
- halfwayPointOffset = realSourceLength - halfwayPoint;
- bal_ed = getTwinEdge ( target );
-
- for ( marker = edge_array[source].rv; marker != NULL; marker = marker->nextOnEdge )
- {
- if ( marker->prevInRead && marker->prevInRead->edgeid == target )
- {
- continue;
- }
-
- newMarker = allocateRV ( marker->readid, target );
- edge_array[target].rv = addRv ( edge_array[target].rv, newMarker );
- bal_new = allocateRV ( -marker->readid, bal_ed );
- edge_array[bal_ed].rv = addRv ( edge_array[bal_ed].rv, bal_new );
- newMarker->bal_rv = bal_new;
- bal_new->bal_rv = newMarker;
- newMarker->start = marker->start;
-
- if ( realSourceLength > 0 )
- {
- breakpoint = halfwayPoint + marker->start;
- }
- else
- {
- breakpoint = marker->start;
- }
-
- bal_new->start = breakpoint;
- marker->start = breakpoint;
- previousMarker = marker->prevInRead;
- connectInRead ( previousMarker, newMarker );
- connectInRead ( newMarker, marker );
- }
-
- return halfwayPointOffset;
+ READINTERVAL *marker, *newMarker, *bal_new, *previousMarker;
+ int halfwayPoint, halfwayPointOffset, breakpoint;
+ int *targetToSourceMapping, *sourceToTargetMapping;
+ unsigned int bal_ed;
+ int targetFinish = targetMarker->bal_rv->start;
+ int sourceStart = sourceMarker->start;
+ int sourceFinish = sourceMarker->bal_rv->start;
+ int alignedSourceLength = sourceFinish - sourceStart;
+ int realSourceLength = edge_array[source].length;
+
+ if ( slowToFast )
+ {
+ sourceToTargetMapping = slowToFastMapping;
+ targetToSourceMapping = fastToSlowMapping;
+ }
+ else
+ {
+ sourceToTargetMapping = fastToSlowMapping;
+ targetToSourceMapping = slowToFastMapping;
+ }
+
+ if ( alignedSourceLength > 0 && targetFinish > 0 )
+ {
+ halfwayPoint = targetToSourceMapping[targetFinish - 1] - sourceStart + 1;
+ halfwayPoint *= realSourceLength;
+ halfwayPoint /= alignedSourceLength;
+ }
+ else
+ {
+ halfwayPoint = 0;
+ }
+
+ if ( halfwayPoint < 0 )
+ {
+ halfwayPoint = 0;
+ }
+
+ if ( halfwayPoint > realSourceLength )
+ {
+ halfwayPoint = realSourceLength;
+ }
+
+ halfwayPointOffset = realSourceLength - halfwayPoint;
+ bal_ed = getTwinEdge ( target );
+
+ for ( marker = edge_array[source].rv; marker != NULL; marker = marker->nextOnEdge )
+ {
+ if ( marker->prevInRead && marker->prevInRead->edgeid == target )
+ {
+ continue;
+ }
+
+ newMarker = allocateRV ( marker->readid, target );
+ edge_array[target].rv = addRv ( edge_array[target].rv, newMarker );
+ bal_new = allocateRV ( -marker->readid, bal_ed );
+ edge_array[bal_ed].rv = addRv ( edge_array[bal_ed].rv, bal_new );
+ newMarker->bal_rv = bal_new;
+ bal_new->bal_rv = newMarker;
+ newMarker->start = marker->start;
+
+ if ( realSourceLength > 0 )
+ {
+ breakpoint = halfwayPoint + marker->start;
+ }
+ else
+ {
+ breakpoint = marker->start;
+ }
+
+ bal_new->start = breakpoint;
+ marker->start = breakpoint;
+ previousMarker = marker->prevInRead;
+ connectInRead ( previousMarker, newMarker );
+ connectInRead ( newMarker, marker );
+ }
+
+ return halfwayPointOffset;
}
static void printKmer ( Kmer kmer )
{
- printKmerSeq ( stderr, kmer );
- fprintf ( stderr, "\n" );
+ printKmerSeq ( stderr, kmer );
+ fprintf ( stderr, "\n" );
}
static int splitNodeDescriptor ( unsigned int source, unsigned int target, int offset )
{
- int originalLength = edge_array[source].length;
- int backLength = originalLength - offset;
- int index, seqLen;
- char * tightSeq, nt, *newSeq;
- unsigned int bal_source = getTwinEdge ( source );
- unsigned int bal_target = getTwinEdge ( target );
- edge_array[source].length = offset;
- edge_array[bal_source].length = offset;
- edge_array[source].flag = 1;
- edge_array[bal_source].flag = 1;
-
- if ( target != 0 )
- {
- edge_array[target].length = backLength;
- edge_array[bal_target].length = backLength;
- free ( ( void * ) edge_array[target].seq );
- edge_array[target].seq = NULL;
- free ( ( void * ) edge_array[bal_target].seq );
- edge_array[bal_target].seq = NULL;
- }
-
- if ( backLength == 0 )
- {
- return 0;
- }
-
- tightSeq = edge_array[source].seq;
- seqLen = backLength / 4 + 1;
-
- if ( target != 0 )
- {
- edge_array[target].flag = 1;
- edge_array[bal_target].flag = 1;
- newSeq = ( char * ) ckalloc ( seqLen * sizeof ( char ) );
- edge_array[target].seq = newSeq;
-
- for ( index = 0; index < backLength; index++ )
- {
- nt = getCharInTightString ( tightSeq, index );
- writeChar2tightString ( nt, newSeq, index );
- }
- }
-
- //source node
- for ( index = backLength; index < originalLength; index++ )
- {
- nt = getCharInTightString ( tightSeq, index );
- writeChar2tightString ( nt, tightSeq, index - backLength );
- }
-
- if ( target == 0 )
- {
- return backLength;
- }
-
- //target twin
- tightSeq = edge_array[bal_source].seq;
- newSeq = ( char * ) ckalloc ( seqLen * sizeof ( char ) );
- edge_array[bal_target].seq = newSeq;
-
- for ( index = offset; index < originalLength; index++ )
- {
- nt = getCharInTightString ( tightSeq, index );
- writeChar2tightString ( nt, newSeq, index - offset );
- }
-
- return backLength;
+ int originalLength = edge_array[source].length;
+ int backLength = originalLength - offset;
+ int index, seqLen;
+ char *tightSeq, nt, *newSeq;
+ unsigned int bal_source = getTwinEdge ( source );
+ unsigned int bal_target = getTwinEdge ( target );
+ edge_array[source].length = offset;
+ edge_array[bal_source].length = offset;
+ edge_array[source].flag = 1;
+ edge_array[bal_source].flag = 1;
+
+ if ( target != 0 )
+ {
+ edge_array[target].length = backLength;
+ edge_array[bal_target].length = backLength;
+ free ( ( void * ) edge_array[target].seq );
+ edge_array[target].seq = NULL;
+ free ( ( void * ) edge_array[bal_target].seq );
+ edge_array[bal_target].seq = NULL;
+ }
+
+ if ( backLength == 0 )
+ {
+ return 0;
+ }
+
+ tightSeq = edge_array[source].seq;
+ seqLen = backLength / 4 + 1;
+
+ if ( target != 0 )
+ {
+ edge_array[target].flag = 1;
+ edge_array[bal_target].flag = 1;
+ newSeq = ( char * ) ckalloc ( seqLen * sizeof ( char ) );
+ edge_array[target].seq = newSeq;
+
+ for ( index = 0; index < backLength; index++ )
+ {
+ nt = getCharInTightString ( tightSeq, index );
+ writeChar2tightString ( nt, newSeq, index );
+ }
+ }
+
+ //source node
+ for ( index = backLength; index < originalLength; index++ )
+ {
+ nt = getCharInTightString ( tightSeq, index );
+ writeChar2tightString ( nt, tightSeq, index - backLength );
+ }
+
+ if ( target == 0 )
+ {
+ return backLength;
+ }
+
+ //target twin
+ tightSeq = edge_array[bal_source].seq;
+ newSeq = ( char * ) ckalloc ( seqLen * sizeof ( char ) );
+ edge_array[bal_target].seq = newSeq;
+
+ for ( index = offset; index < originalLength; index++ )
+ {
+ nt = getCharInTightString ( tightSeq, index );
+ writeChar2tightString ( nt, newSeq, index - offset );
+ }
+
+ return backLength;
}
static void remapBackOfNodeDescriptorOntoNeighbour ( unsigned int source, unsigned int target, boolean slowToFast, int offset )
{
- unsigned int bal_source = getTwinEdge ( source );
- unsigned int bal_target = getTwinEdge ( target );
- Kmer source_from_vt_kmer , bal_source_from_vt_kmer;
- Kmer word;
- int index;
- char nt;
- int backlength ;
-
- if ( slowToFast )
- {
- backlength = splitNodeDescriptor ( source, 0, offset );
- edge_array[source].from_vt = edge_array[target].to_vt;
- edge_array[bal_source].to_vt = edge_array[bal_target].from_vt;
- }
- else
- {
- backlength = splitNodeDescriptor ( source, target, offset );
- source_from_vt_kmer = vt_array[edge_array[source].from_vt].kmer;
- bal_source_from_vt_kmer = vt_array[edge_array[bal_source].to_vt].kmer;
- edge_array[target].from_vt = new_num_vt;
-
- if ( new_num_vt + 1 > num_kmer_limit )
- {
- fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
- exit ( -1 );
- }
-
- vt_array[new_num_vt++].kmer = source_from_vt_kmer;
- edge_array[bal_target].to_vt = new_num_vt;
-
- if ( new_num_vt + 1 > num_kmer_limit )
- {
- fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
- exit ( -1 );
- }
-
- vt_array[new_num_vt++].kmer = bal_source_from_vt_kmer;
- word = vt_array[edge_array[target].from_vt].kmer;
-
- for ( index = 0; index < backlength; index++ )
- {
- nt = getCharInTightString ( edge_array[target].seq, index );
- word = nextKmer ( word, nt );
- }
-
- edge_array[target].to_vt = new_num_vt;
-
- if ( new_num_vt + 1 > num_kmer_limit )
- {
- fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
- exit ( -1 );
- }
-
- vt_array[new_num_vt++].kmer = word;
- edge_array[source].from_vt = new_num_vt;
-
- if ( new_num_vt + 1 > num_kmer_limit )
- {
- fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
- exit ( -1 );
- }
-
- vt_array[new_num_vt++].kmer = word;
- word = vt_array[edge_array[bal_source].from_vt].kmer;
-
- for ( index = 0; index < offset; index++ )
- {
- nt = getCharInTightString ( edge_array[bal_source].seq, index );
- word = nextKmer ( word, nt );
- }
-
- edge_array[bal_target].from_vt = new_num_vt;
-
- if ( new_num_vt + 1 > num_kmer_limit )
- {
- fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
- exit ( -1 );
- }
-
- vt_array[new_num_vt++].kmer = word;
- edge_array[bal_source].to_vt = new_num_vt;
-
- if ( new_num_vt + 1 > num_kmer_limit )
- {
- fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
- exit ( -1 );
- }
-
- vt_array[new_num_vt++].kmer = word;
- }
+ unsigned int bal_source = getTwinEdge ( source );
+ unsigned int bal_target = getTwinEdge ( target );
+ Kmer source_from_vt_kmer , bal_source_from_vt_kmer;
+ Kmer word;
+ int index;
+ char nt;
+ int backlength ;
+
+ if ( slowToFast )
+ {
+ backlength = splitNodeDescriptor ( source, 0, offset );
+ edge_array[source].from_vt = edge_array[target].to_vt;
+ edge_array[bal_source].to_vt = edge_array[bal_target].from_vt;
+ }
+ else
+ {
+ backlength = splitNodeDescriptor ( source, target, offset );
+ source_from_vt_kmer = vt_array[edge_array[source].from_vt].kmer;
+ bal_source_from_vt_kmer = vt_array[edge_array[bal_source].to_vt].kmer;
+ edge_array[target].from_vt = new_num_vt;
+
+ if ( new_num_vt + 1 > num_kmer_limit )
+ {
+ fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
+ exit ( -1 );
+ }
+
+ vt_array[new_num_vt++].kmer = source_from_vt_kmer;
+ edge_array[bal_target].to_vt = new_num_vt;
+
+ if ( new_num_vt + 1 > num_kmer_limit )
+ {
+ fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
+ exit ( -1 );
+ }
+
+ vt_array[new_num_vt++].kmer = bal_source_from_vt_kmer;
+ word = vt_array[edge_array[target].from_vt].kmer;
+
+ for ( index = 0; index < backlength; index++ )
+ {
+ nt = getCharInTightString ( edge_array[target].seq, index );
+ word = nextKmer ( word, nt );
+ }
+
+ edge_array[target].to_vt = new_num_vt;
+
+ if ( new_num_vt + 1 > num_kmer_limit )
+ {
+ fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
+ exit ( -1 );
+ }
+
+ vt_array[new_num_vt++].kmer = word;
+ edge_array[source].from_vt = new_num_vt;
+
+ if ( new_num_vt + 1 > num_kmer_limit )
+ {
+ fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
+ exit ( -1 );
+ }
+
+ vt_array[new_num_vt++].kmer = word;
+ word = vt_array[edge_array[bal_source].from_vt].kmer;
+
+ for ( index = 0; index < offset; index++ )
+ {
+ nt = getCharInTightString ( edge_array[bal_source].seq, index );
+ word = nextKmer ( word, nt );
+ }
+
+ edge_array[bal_target].from_vt = new_num_vt;
+
+ if ( new_num_vt + 1 > num_kmer_limit )
+ {
+ fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
+ exit ( -1 );
+ }
+
+ vt_array[new_num_vt++].kmer = word;
+ edge_array[bal_source].to_vt = new_num_vt;
+
+ if ( new_num_vt + 1 > num_kmer_limit )
+ {
+ fprintf ( stderr, "Error : Number of vertex is out of range.\n" );
+ exit ( -1 );
+ }
+
+ vt_array[new_num_vt++].kmer = word;
+ }
}
static void remapBackOfNodeTimesOntoNeighbour ( unsigned int source, unsigned int target )
{
- Time targetTime = times[target];
- Time nodeTime = times[source];
- unsigned int twinTarget = getTwinEdge ( target );
- unsigned int twinSource = getTwinEdge ( source );
- unsigned int previousNode;
-
- if ( nodeTime != -1 )
- {
- previousNode = previous[source];
-
- if ( previousNode == source )
- {
- times[target] = nodeTime;
- previous[target] = target;
- }
- else if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( target, previousNode ) ) )
- {
- times[target] = nodeTime;
-
- if ( previousNode != twinSource )
- {
- previous[target] = previousNode;
- }
- else
- {
- previous[target] = twinTarget;
- }
- }
-
- previous[source] = target;
- }
-
- targetTime = times[twinTarget];
- nodeTime = times[twinSource];
-
- if ( nodeTime != -1 )
- {
- if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( twinTarget, twinSource ) ) )
- {
- times[twinTarget] = nodeTime;
- previous[twinTarget] = twinSource;
- }
- }
-
- remapNodeInwardReferencesOntoNode ( twinSource, twinTarget );
+ Time targetTime = times[target];
+ Time nodeTime = times[source];
+ unsigned int twinTarget = getTwinEdge ( target );
+ unsigned int twinSource = getTwinEdge ( source );
+ unsigned int previousNode;
+
+ if ( nodeTime != -1 )
+ {
+ previousNode = previous[source];
+
+ if ( previousNode == source )
+ {
+ times[target] = nodeTime;
+ previous[target] = target;
+ }
+ else if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( target, previousNode ) ) )
+ {
+ times[target] = nodeTime;
+
+ if ( previousNode != twinSource )
+ {
+ previous[target] = previousNode;
+ }
+ else
+ {
+ previous[target] = twinTarget;
+ }
+ }
+
+ previous[source] = target;
+ }
+
+ targetTime = times[twinTarget];
+ nodeTime = times[twinSource];
+
+ if ( nodeTime != -1 )
+ {
+ if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( twinTarget, twinSource ) ) )
+ {
+ times[twinTarget] = nodeTime;
+ previous[twinTarget] = twinSource;
+ }
+ }
+
+ remapNodeInwardReferencesOntoNode ( twinSource, twinTarget );
}
static void remapBackOfNodeArcsOntoNeighbour ( unsigned int source, unsigned int target )
{
- ARC * arc;
- remapNodeArcsOntoTarget ( getTwinEdge ( source ), getTwinEdge ( target ) );
+ ARC *arc;
+ remapNodeArcsOntoTarget ( getTwinEdge ( source ), getTwinEdge ( target ) );
- for ( arc = edge_array[source].arcs; arc != NULL; arc = arc->next )
- {
- createAnalogousArc ( target, source, arc );
- }
+ for ( arc = edge_array[source].arcs; arc != NULL; arc = arc->next )
+ {
+ createAnalogousArc ( target, source, arc );
+ }
}
/*************************************************
@@ -1552,22 +1552,22 @@ Output:
Return:
None.
*************************************************/
-static void remapBackOfNodeOntoNeighbour ( unsigned int source, READINTERVAL * sourceMarker, unsigned int target, READINTERVAL * targetMarker, boolean slowToFast )
+static void remapBackOfNodeOntoNeighbour ( unsigned int source, READINTERVAL *sourceMarker, unsigned int target, READINTERVAL *targetMarker, boolean slowToFast )
{
- int offset;
- offset = remapBackOfNodeMarkersOntoNeighbour ( source, sourceMarker, target, targetMarker, slowToFast );
- remapBackOfNodeDescriptorOntoNeighbour ( source, target, slowToFast, offset );
- combineCOV ( source, edge_array[source].length, target, edge_array[target].length );
- remapBackOfNodeTimesOntoNeighbour ( source, target );
- remapBackOfNodeArcsOntoNeighbour ( source, target );
- remapNodeFibHeapReferencesOntoNode ( getTwinEdge ( source ), getTwinEdge ( target ) );
- //why not "remapNodeFibHeapReferencesOntoNode(source,target);"
- //because the downstream part of source still retains, which can serve as previousNode as before
-
- if ( getTwinEdge ( source ) == startingNode )
- {
- startingNode = getTwinEdge ( target );
- }
+ int offset;
+ offset = remapBackOfNodeMarkersOntoNeighbour ( source, sourceMarker, target, targetMarker, slowToFast );
+ remapBackOfNodeDescriptorOntoNeighbour ( source, target, slowToFast, offset );
+ combineCOV ( source, edge_array[source].length, target, edge_array[target].length );
+ remapBackOfNodeTimesOntoNeighbour ( source, target );
+ remapBackOfNodeArcsOntoNeighbour ( source, target );
+ remapNodeFibHeapReferencesOntoNode ( getTwinEdge ( source ), getTwinEdge ( target ) );
+ //why not "remapNodeFibHeapReferencesOntoNode(source,target);"
+ //because the downstream part of source still retains, which can serve as previousNode as before
+
+ if ( getTwinEdge ( source ) == startingNode )
+ {
+ startingNode = getTwinEdge ( target );
+ }
}
/*************************************************
@@ -1583,17 +1583,17 @@ Output:
Return:
True if the edge is on the path.
*************************************************/
-static boolean markerLeadsToNode ( READINTERVAL * marker, unsigned int node )
+static boolean markerLeadsToNode ( READINTERVAL *marker, unsigned int node )
{
- READINTERVAL * currentMarker;
+ READINTERVAL *currentMarker;
- for ( currentMarker = marker; currentMarker != NULL; currentMarker = currentMarker->nextInRead )
- if ( currentMarker->edgeid == node )
- {
- return true;
- }
+ for ( currentMarker = marker; currentMarker != NULL; currentMarker = currentMarker->nextInRead )
+ if ( currentMarker->edgeid == node )
+ {
+ return true;
+ }
- return false;
+ return false;
}
/*************************************************
@@ -1610,9 +1610,9 @@ Return:
*************************************************/
static void reduceNode ( unsigned int node )
{
- unsigned int bal_ed = getTwinEdge ( node );
- edge_array[node].length = 0;
- edge_array[bal_ed].length = 0;
+ unsigned int bal_ed = getTwinEdge ( node );
+ edge_array[node].length = 0;
+ edge_array[bal_ed].length = 0;
}
/*************************************************
@@ -1628,191 +1628,191 @@ Output:
Return:
None.
*************************************************/
-static void reduceSlowNodes ( READINTERVAL * slowMarker, unsigned int finish )
+static void reduceSlowNodes ( READINTERVAL *slowMarker, unsigned int finish )
{
- READINTERVAL * marker;
+ READINTERVAL *marker;
- for ( marker = slowMarker; marker->edgeid != finish; marker = marker->nextInRead )
- {
- reduceNode ( marker->edgeid );
- }
+ for ( marker = slowMarker; marker->edgeid != finish; marker = marker->nextInRead )
+ {
+ reduceNode ( marker->edgeid );
+ }
}
-static boolean markerLeadsToArc ( READINTERVAL * marker, unsigned int nodeA, unsigned int nodeB )
+static boolean markerLeadsToArc ( READINTERVAL *marker, unsigned int nodeA, unsigned int nodeB )
{
- READINTERVAL * current, *next;
- unsigned int twinA = getTwinEdge ( nodeA );
- unsigned int twinB = getTwinEdge ( nodeB );
- current = marker;
-
- while ( current != NULL )
- {
- next = current->nextInRead;
-
- if ( current->edgeid == nodeA && next->edgeid == nodeB )
- {
- return true;
- }
+ READINTERVAL *current, *next;
+ unsigned int twinA = getTwinEdge ( nodeA );
+ unsigned int twinB = getTwinEdge ( nodeB );
+ current = marker;
+
+ while ( current != NULL )
+ {
+ next = current->nextInRead;
+
+ if ( current->edgeid == nodeA && next->edgeid == nodeB )
+ {
+ return true;
+ }
- if ( current->edgeid == twinB && next->edgeid == twinA )
- {
- return true;
- }
+ if ( current->edgeid == twinB && next->edgeid == twinA )
+ {
+ return true;
+ }
- current = next;
- }
+ current = next;
+ }
- return false;
+ return false;
}
-static void remapEmptyPathArcsOntoMiddlePathSimple ( READINTERVAL * emptyPath, READINTERVAL * targetPath )
+static void remapEmptyPathArcsOntoMiddlePathSimple ( READINTERVAL *emptyPath, READINTERVAL *targetPath )
{
- READINTERVAL * pathMarker, *marker;
- unsigned int start = emptyPath->prevInRead->edgeid;
- unsigned int finish = emptyPath->edgeid;
- unsigned int previousNode = start;
- unsigned int currentNode;
- ARC * originalArc = getArcBetween ( start, finish );
-
- if ( !originalArc )
- {
- fprintf ( stderr, "RemapEmptyPathArcsOntoMiddlePathSimple: no arc between %d and %d.\n", start, finish );
- marker = fastPath;
- fprintf ( stderr, "Fast path: " );
-
- while ( marker )
- {
- fprintf ( stderr, "%d,", marker->edgeid );
- marker = marker->nextInRead;
- }
-
- fprintf ( stderr, "\n" );
- marker = slowPath;
- fprintf ( stderr, "Slow path: " );
-
- while ( marker )
- {
- fprintf ( stderr, "%d,", marker->edgeid );
- marker = marker->nextInRead;
- }
-
- fprintf ( stderr, "\n" );
- }
-
- for ( pathMarker = targetPath; pathMarker->edgeid != finish; pathMarker = pathMarker->nextInRead )
- {
- currentNode = pathMarker->edgeid;
- createAnalogousArc ( previousNode, currentNode, originalArc );
- previousNode = currentNode;
- }
-
- createAnalogousArc ( previousNode, finish, originalArc );
- destroyArc ( start, originalArc );
+ READINTERVAL *pathMarker, *marker;
+ unsigned int start = emptyPath->prevInRead->edgeid;
+ unsigned int finish = emptyPath->edgeid;
+ unsigned int previousNode = start;
+ unsigned int currentNode;
+ ARC *originalArc = getArcBetween ( start, finish );
+
+ if ( !originalArc )
+ {
+ fprintf ( stderr, "RemapEmptyPathArcsOntoMiddlePathSimple: no arc between %d and %d.\n", start, finish );
+ marker = fastPath;
+ fprintf ( stderr, "Fast path: " );
+
+ while ( marker )
+ {
+ fprintf ( stderr, "%d,", marker->edgeid );
+ marker = marker->nextInRead;
+ }
+
+ fprintf ( stderr, "\n" );
+ marker = slowPath;
+ fprintf ( stderr, "Slow path: " );
+
+ while ( marker )
+ {
+ fprintf ( stderr, "%d,", marker->edgeid );
+ marker = marker->nextInRead;
+ }
+
+ fprintf ( stderr, "\n" );
+ }
+
+ for ( pathMarker = targetPath; pathMarker->edgeid != finish; pathMarker = pathMarker->nextInRead )
+ {
+ currentNode = pathMarker->edgeid;
+ createAnalogousArc ( previousNode, currentNode, originalArc );
+ previousNode = currentNode;
+ }
+
+ createAnalogousArc ( previousNode, finish, originalArc );
+ destroyArc ( start, originalArc );
}
-static void remapEmptyPathMarkersOntoMiddlePathSimple ( READINTERVAL * emptyPath, READINTERVAL * targetPath, boolean slowToFast )
+static void remapEmptyPathMarkersOntoMiddlePathSimple ( READINTERVAL *emptyPath, READINTERVAL *targetPath, boolean slowToFast )
{
- READINTERVAL * marker, *newMarker, *previousMarker, *pathMarker, *bal_marker;
- unsigned int start = emptyPath->prevInRead->edgeid;
- unsigned int finish = emptyPath->edgeid;
- unsigned int markerStart, bal_ed;
- READINTERVAL * oldMarker = edge_array[finish].rv;
-
- while ( oldMarker )
- {
- marker = oldMarker;
- oldMarker = marker->nextOnEdge;
- newMarker = marker->prevInRead;
-
- if ( newMarker->edgeid != start )
- {
- continue;
- }
-
- if ( ( slowToFast && marker->readid != 2 ) || ( !slowToFast && marker->readid != 1 ) )
- {
- continue;
- }
-
- markerStart = marker->start;
-
- for ( pathMarker = targetPath; pathMarker->edgeid != finish; pathMarker = pathMarker->nextInRead )
- {
- previousMarker = newMarker;
- //maker a new marker
- newMarker = allocateRV ( marker->readid, pathMarker->edgeid );
- newMarker->start = markerStart;
- edge_array[pathMarker->edgeid].rv = addRv ( edge_array[pathMarker->edgeid].rv, newMarker );
- //maker the twin marker
- bal_ed = getTwinEdge ( pathMarker->edgeid );
- bal_marker = allocateRV ( -marker->readid, bal_ed );
- bal_marker->start = markerStart;
- edge_array[bal_ed].rv = addRv ( edge_array[bal_ed].rv, bal_marker );
- newMarker->bal_rv = bal_marker;
- bal_marker->bal_rv = newMarker;
- connectInRead ( previousMarker, newMarker );
- }
-
- connectInRead ( newMarker, marker );
- }
+ READINTERVAL *marker, *newMarker, *previousMarker, *pathMarker, *bal_marker;
+ unsigned int start = emptyPath->prevInRead->edgeid;
+ unsigned int finish = emptyPath->edgeid;
+ unsigned int markerStart, bal_ed;
+ READINTERVAL *oldMarker = edge_array[finish].rv;
+
+ while ( oldMarker )
+ {
+ marker = oldMarker;
+ oldMarker = marker->nextOnEdge;
+ newMarker = marker->prevInRead;
+
+ if ( newMarker->edgeid != start )
+ {
+ continue;
+ }
+
+ if ( ( slowToFast && marker->readid != 2 ) || ( !slowToFast && marker->readid != 1 ) )
+ {
+ continue;
+ }
+
+ markerStart = marker->start;
+
+ for ( pathMarker = targetPath; pathMarker->edgeid != finish; pathMarker = pathMarker->nextInRead )
+ {
+ previousMarker = newMarker;
+ //maker a new marker
+ newMarker = allocateRV ( marker->readid, pathMarker->edgeid );
+ newMarker->start = markerStart;
+ edge_array[pathMarker->edgeid].rv = addRv ( edge_array[pathMarker->edgeid].rv, newMarker );
+ //maker the twin marker
+ bal_ed = getTwinEdge ( pathMarker->edgeid );
+ bal_marker = allocateRV ( -marker->readid, bal_ed );
+ bal_marker->start = markerStart;
+ edge_array[bal_ed].rv = addRv ( edge_array[bal_ed].rv, bal_marker );
+ newMarker->bal_rv = bal_marker;
+ bal_marker->bal_rv = newMarker;
+ connectInRead ( previousMarker, newMarker );
+ }
+
+ connectInRead ( newMarker, marker );
+ }
}
-static void remapNodeTimesOntoForwardMiddlePath ( unsigned int source, READINTERVAL * path )
+static void remapNodeTimesOntoForwardMiddlePath ( unsigned int source, READINTERVAL *path )
{
- READINTERVAL * marker;
- unsigned int target;
- Time nodeTime = times[source];
- unsigned int previousNode = previous[source];
- Time targetTime;
-
- for ( marker = path; marker->edgeid != source; marker = marker->nextInRead )
- {
- target = marker->edgeid;
- targetTime = times[target];
-
- if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( target, previousNode ) ) )
- {
- times[target] = nodeTime;
- previous[target] = previousNode;
- }
-
- previousNode = target;
- }
-
- previous[source] = previousNode;
+ READINTERVAL *marker;
+ unsigned int target;
+ Time nodeTime = times[source];
+ unsigned int previousNode = previous[source];
+ Time targetTime;
+
+ for ( marker = path; marker->edgeid != source; marker = marker->nextInRead )
+ {
+ target = marker->edgeid;
+ targetTime = times[target];
+
+ if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( target, previousNode ) ) )
+ {
+ times[target] = nodeTime;
+ previous[target] = previousNode;
+ }
+
+ previousNode = target;
+ }
+
+ previous[source] = previousNode;
}
-static void remapNodeTimesOntoTwinMiddlePath ( unsigned int source, READINTERVAL * path )
+static void remapNodeTimesOntoTwinMiddlePath ( unsigned int source, READINTERVAL *path )
{
- READINTERVAL * marker;
- unsigned int target;
- unsigned int previousNode = getTwinEdge ( source );
- Time targetTime;
- READINTERVAL * limit = path->prevInRead->bal_rv;
- Time nodeTime = times[limit->edgeid];
- marker = path;
-
- while ( marker->edgeid != source )
- {
- marker = marker->nextInRead;
- }
-
- marker = marker->bal_rv;
-
- while ( marker != limit )
- {
- marker = marker->nextInRead;
- target = marker->edgeid;
- targetTime = times[target];
-
- if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( target, previousNode ) ) )
- {
- times[target] = nodeTime;
- previous[target] = previousNode;
- }
-
- previousNode = target;
- }
+ READINTERVAL *marker;
+ unsigned int target;
+ unsigned int previousNode = getTwinEdge ( source );
+ Time targetTime;
+ READINTERVAL *limit = path->prevInRead->bal_rv;
+ Time nodeTime = times[limit->edgeid];
+ marker = path;
+
+ while ( marker->edgeid != source )
+ {
+ marker = marker->nextInRead;
+ }
+
+ marker = marker->bal_rv;
+
+ while ( marker != limit )
+ {
+ marker = marker->nextInRead;
+ target = marker->edgeid;
+ targetTime = times[target];
+
+ if ( targetTime == -1 || targetTime > nodeTime || ( targetTime == nodeTime && !isPreviousToNode ( target, previousNode ) ) )
+ {
+ times[target] = nodeTime;
+ previous[target] = previousNode;
+ }
+
+ previousNode = target;
+ }
}
/*************************************************
@@ -1829,29 +1829,29 @@ Output:
Return:
None.
*************************************************/
-static void remapEmptyPathOntoMiddlePath ( READINTERVAL * emptyPath, READINTERVAL * targetPath, boolean slowToFast )
+static void remapEmptyPathOntoMiddlePath ( READINTERVAL *emptyPath, READINTERVAL *targetPath, boolean slowToFast )
{
- unsigned int start = emptyPath->prevInRead->edgeid;
- unsigned int finish = emptyPath->edgeid;
-
- // Remapping markers
- if ( !markerLeadsToArc ( targetPath, start, finish ) )
- {
- remapEmptyPathArcsOntoMiddlePathSimple ( emptyPath, targetPath );
- }
-
- remapEmptyPathMarkersOntoMiddlePathSimple ( emptyPath, targetPath, slowToFast );
-
- //Remap times and previous(if necessary)
- if ( getNodePrevious ( finish ) == start )
- {
- remapNodeTimesOntoForwardMiddlePath ( finish, targetPath );
- }
-
- if ( getNodePrevious ( getTwinEdge ( start ) ) == getTwinEdge ( finish ) )
- {
- remapNodeTimesOntoTwinMiddlePath ( finish, targetPath );
- }
+ unsigned int start = emptyPath->prevInRead->edgeid;
+ unsigned int finish = emptyPath->edgeid;
+
+ // Remapping markers
+ if ( !markerLeadsToArc ( targetPath, start, finish ) )
+ {
+ remapEmptyPathArcsOntoMiddlePathSimple ( emptyPath, targetPath );
+ }
+
+ remapEmptyPathMarkersOntoMiddlePathSimple ( emptyPath, targetPath, slowToFast );
+
+ //Remap times and previous(if necessary)
+ if ( getNodePrevious ( finish ) == start )
+ {
+ remapNodeTimesOntoForwardMiddlePath ( finish, targetPath );
+ }
+
+ if ( getNodePrevious ( getTwinEdge ( start ) ) == getTwinEdge ( finish ) )
+ {
+ remapNodeTimesOntoTwinMiddlePath ( finish, targetPath );
+ }
}
/*************************************************
@@ -1868,140 +1868,146 @@ Return:
*************************************************/
static boolean cleanUpRedundancy ()
{
- READINTERVAL * slowMarker = slowPath->nextInRead, *fastMarker = fastPath->nextInRead;
- unsigned int slowNode, fastNode;
- int slowLength, fastLength;
- int fastConstraint = 0;
- int slowConstraint = 0;
- int finalLength;
- attachPath ( slowPath );
- attachPath ( fastPath );
- mapSlowOntoFast ();
- finalLength = mapDistancesOntoPaths ();
- slowLength = fastLength = 0;
-
- while ( slowMarker != NULL && fastMarker != NULL )
- {
- if ( !slowMarker->nextInRead )
- {
- slowLength = finalLength;
- }
- else
- {
- slowLength = slowToFastMapping[slowMarker->bal_rv->start - 1];
-
- if ( slowLength < slowConstraint )
- {
- slowLength = slowConstraint;
- }
- }
-
- fastLength = fastMarker->bal_rv->start - 1;
-
- if ( fastLength < fastConstraint )
- {
- fastLength = fastConstraint;
- }
-
- slowNode = slowMarker->edgeid;
- fastNode = fastMarker->edgeid;
-
- if ( false )
- { fprintf ( stderr, "Slow %d Fast %d.\n", slowLength, fastLength ); }
-
- if ( slowNode == fastNode )
- {
- if ( false )
- { fprintf ( stderr, "0/ Already merged together %d == %d.\n", slowNode, fastNode ); }
-
- if ( fastLength > slowLength )
- {
- slowConstraint = fastLength;
- }
-
- fastConstraint = slowLength;
- slowMarker = slowMarker->nextInRead;
- fastMarker = fastMarker->nextInRead;
- }
- else if ( slowNode == getTwinEdge ( fastNode ) )
- {
- if ( false )
- { fprintf ( stderr, "1/ Creme de la hairpin %d $$ %d.\n", slowNode, fastNode ); }
-
- if ( fastLength > slowLength )
- {
- slowConstraint = fastLength;
- }
-
- fastConstraint = slowLength;
- slowMarker = slowMarker->nextInRead;
- fastMarker = fastMarker->nextInRead;
- }
- else if ( markerLeadsToNode ( slowMarker, fastNode ) )
- {
- if ( false )
- {
- fprintf ( stderr, "2/ Remapping empty fast arc onto slow nodes.\n" );
- }
-
- reduceSlowNodes ( slowMarker, fastNode );
- remapEmptyPathOntoMiddlePath ( fastMarker, slowMarker, FAST_TO_SLOW );
-
- while ( slowMarker->edgeid != fastNode )
- {
- slowMarker = slowMarker->nextInRead;
- }
- }
- else if ( markerLeadsToNode ( fastMarker, slowNode ) )
- {
- if ( false )
- {
- fprintf ( stderr, "3/ Remapping empty slow arc onto fast nodes.\n" );
- }
-
- remapEmptyPathOntoMiddlePath ( slowMarker, fastMarker, SLOW_TO_FAST );
-
- while ( fastMarker->edgeid != slowNode )
- {
- fastMarker = fastMarker->nextInRead;
- }
- }
- else if ( slowLength == fastLength )
- {
- if ( false )
- {
- fprintf ( stderr, "A/ Mapped equivalent nodes together %d <=> %d.\n", slowNode, fastNode );
- }
-
- remapNodeOntoNeighbour ( slowNode, fastNode );
- slowMarker = slowMarker->nextInRead;
- fastMarker = fastMarker->nextInRead;
- }
- else if ( slowLength < fastLength )
- {
- if ( false )
- {
- fprintf ( stderr, "B/ Mapped back of fast node into slow %d -> %d.\n", fastNode, slowNode );
- }
-
- remapBackOfNodeOntoNeighbour ( fastNode, fastMarker, slowNode, slowMarker, FAST_TO_SLOW );
- slowMarker = slowMarker->nextInRead;
- }
- else
- {
- if ( false )
- {
- fprintf ( stderr, "C/ Mapped back of slow node into fast %d -> %d.\n", slowNode, fastNode );
- }
-
- remapBackOfNodeOntoNeighbour ( slowNode, slowMarker, fastNode, fastMarker, SLOW_TO_FAST );
- fastMarker = fastMarker->nextInRead;
- }
- }
-
- detachPath ( fastPath );
- detachPath ( slowPath );
- return 1;
+ READINTERVAL *slowMarker = slowPath->nextInRead, *fastMarker = fastPath->nextInRead;
+ unsigned int slowNode, fastNode;
+ int slowLength, fastLength;
+ int fastConstraint = 0;
+ int slowConstraint = 0;
+ int finalLength;
+ attachPath ( slowPath );
+ attachPath ( fastPath );
+ mapSlowOntoFast ();
+ finalLength = mapDistancesOntoPaths ();
+ slowLength = fastLength = 0;
+
+ while ( slowMarker != NULL && fastMarker != NULL )
+ {
+ if ( !slowMarker->nextInRead )
+ {
+ slowLength = finalLength;
+ }
+ else
+ {
+ slowLength = slowToFastMapping[slowMarker->bal_rv->start - 1];
+
+ if ( slowLength < slowConstraint )
+ {
+ slowLength = slowConstraint;
+ }
+ }
+
+ fastLength = fastMarker->bal_rv->start - 1;
+
+ if ( fastLength < fastConstraint )
+ {
+ fastLength = fastConstraint;
+ }
+
+ slowNode = slowMarker->edgeid;
+ fastNode = fastMarker->edgeid;
+
+ if ( false )
+ {
+ fprintf ( stderr, "Slow %d Fast %d.\n", slowLength, fastLength );
+ }
+
+ if ( slowNode == fastNode )
+ {
+ if ( false )
+ {
+ fprintf ( stderr, "0/ Already merged together %d == %d.\n", slowNode, fastNode );
+ }
+
+ if ( fastLength > slowLength )
+ {
+ slowConstraint = fastLength;
+ }
+
+ fastConstraint = slowLength;
+ slowMarker = slowMarker->nextInRead;
+ fastMarker = fastMarker->nextInRead;
+ }
+ else if ( slowNode == getTwinEdge ( fastNode ) )
+ {
+ if ( false )
+ {
+ fprintf ( stderr, "1/ Creme de la hairpin %d $$ %d.\n", slowNode, fastNode );
+ }
+
+ if ( fastLength > slowLength )
+ {
+ slowConstraint = fastLength;
+ }
+
+ fastConstraint = slowLength;
+ slowMarker = slowMarker->nextInRead;
+ fastMarker = fastMarker->nextInRead;
+ }
+ else if ( markerLeadsToNode ( slowMarker, fastNode ) )
+ {
+ if ( false )
+ {
+ fprintf ( stderr, "2/ Remapping empty fast arc onto slow nodes.\n" );
+ }
+
+ reduceSlowNodes ( slowMarker, fastNode );
+ remapEmptyPathOntoMiddlePath ( fastMarker, slowMarker, FAST_TO_SLOW );
+
+ while ( slowMarker->edgeid != fastNode )
+ {
+ slowMarker = slowMarker->nextInRead;
+ }
+ }
+ else if ( markerLeadsToNode ( fastMarker, slowNode ) )
+ {
+ if ( false )
+ {
+ fprintf ( stderr, "3/ Remapping empty slow arc onto fast nodes.\n" );
+ }
+
+ remapEmptyPathOntoMiddlePath ( slowMarker, fastMarker, SLOW_TO_FAST );
+
+ while ( fastMarker->edgeid != slowNode )
+ {
+ fastMarker = fastMarker->nextInRead;
+ }
+ }
+ else if ( slowLength == fastLength )
+ {
+ if ( false )
+ {
+ fprintf ( stderr, "A/ Mapped equivalent nodes together %d <=> %d.\n", slowNode, fastNode );
+ }
+
+ remapNodeOntoNeighbour ( slowNode, fastNode );
+ slowMarker = slowMarker->nextInRead;
+ fastMarker = fastMarker->nextInRead;
+ }
+ else if ( slowLength < fastLength )
+ {
+ if ( false )
+ {
+ fprintf ( stderr, "B/ Mapped back of fast node into slow %d -> %d.\n", fastNode, slowNode );
+ }
+
+ remapBackOfNodeOntoNeighbour ( fastNode, fastMarker, slowNode, slowMarker, FAST_TO_SLOW );
+ slowMarker = slowMarker->nextInRead;
+ }
+ else
+ {
+ if ( false )
+ {
+ fprintf ( stderr, "C/ Mapped back of slow node into fast %d -> %d.\n", slowNode, fastNode );
+ }
+
+ remapBackOfNodeOntoNeighbour ( slowNode, slowMarker, fastNode, fastMarker, SLOW_TO_FAST );
+ fastMarker = fastMarker->nextInRead;
+ }
+ }
+
+ detachPath ( fastPath );
+ detachPath ( slowPath );
+ return 1;
}
/*************************************************
@@ -2021,160 +2027,160 @@ Return:
*************************************************/
static void comparePaths ( unsigned int destination, unsigned int origin )
{
- int slowLength, fastLength, i;
- unsigned int fastNode, slowNode;
- READINTERVAL * marker;
- slowLength = fastLength = 0;
- fastNode = destination;
- slowNode = origin;
- btCounter++;
-
- while ( fastNode != slowNode )
- {
- if ( times[fastNode] > times[slowNode] )
- {
- fastLength++;
- fastNode = previous[fastNode];
- }
- else if ( times[fastNode] < times[slowNode] )
- {
- slowLength++;
- slowNode = previous[slowNode];
- }
- else if ( isPreviousToNode ( slowNode, fastNode ) )
- {
- while ( fastNode != slowNode )
- {
- fastLength++;
- fastNode = previous[fastNode];
- }
- }
- else if ( isPreviousToNode ( fastNode, slowNode ) )
- {
- while ( slowNode != fastNode )
- {
- slowLength++;
- slowNode = previous[slowNode];
- }
- }
- else
- {
- fastLength++;
- fastNode = previous[fastNode];
- slowLength++;
- slowNode = previous[slowNode];
- }
-
- if ( slowLength > MAXNODELENGTH || fastLength > MAXNODELENGTH )
- {
- return;
- }
- }
-
- if ( fastLength == 0 )
- {
- return;
- }
-
- marker = allocateRV ( 1, destination );
- fastPath = marker;
-
- for ( i = 0; i < fastLength; i++ )
- {
- marker = allocateRV ( 1, previous[fastPath->edgeid] );
- marker->nextInRead = fastPath;
- fastPath->prevInRead = marker;
- fastPath = marker;
- }
-
- marker = allocateRV ( 2, destination );
- slowPath = marker;
- marker = allocateRV ( 2, origin );
- marker->nextInRead = slowPath;
- slowPath->prevInRead = marker;
- slowPath = marker;
-
- for ( i = 0; i < slowLength; i++ )
- {
- marker = allocateRV ( 2, previous[slowPath->edgeid] );
- marker->nextInRead = slowPath;
- slowPath->prevInRead = marker;
- slowPath = marker;
- }
-
- fastSeqLength = extractSequence ( fastPath, fastSequence );
- slowSeqLength = extractSequence ( slowPath, slowSequence );
-
- /*
- if(destination==6359){
- printf("destination %d, slowLength %d, fastLength %d\n",destination,slowLength,fastLength);
- printf("fastSeqLength %d, slowSeqLength %d\n",fastSeqLength,slowSeqLength);
- }
- */
- if ( !fastSeqLength || !slowSeqLength )
- {
- detachPathSingle ( slowPath );
- detachPathSingle ( fastPath );
- return;
- }
-
- cmpCounter++;
-
- if ( !compareSequences ( fastSequence, slowSequence, fastSeqLength, slowSeqLength ) )
- {
- detachPathSingle ( slowPath );
- detachPathSingle ( fastPath );
- return;
- }
-
- //only merge clean bubble ...
- if ( clean )
- {
- unsigned int bal_ed;
- unsigned int arcRight_n, arcLeft_n;
- READINTERVAL * tmp;
- tmp = fastPath->nextInRead;
-
- while ( tmp->nextInRead )
- {
- bal_ed = getTwinEdge ( tmp->edgeid );
- arcCount ( tmp->edgeid, &arcRight_n );
- arcCount ( bal_ed, &arcLeft_n );
-
- if ( arcRight_n != 1 || arcLeft_n != 1 ) //not clean bubble
- {
- return;
- }
-
- tmp = tmp->nextInRead;
- }
-
- tmp = slowPath->nextInRead;
-
- while ( tmp->nextInRead )
- {
- bal_ed = getTwinEdge ( tmp->edgeid );
- arcCount ( tmp->edgeid, &arcRight_n );
- arcCount ( bal_ed, &arcLeft_n );
-
- if ( arcRight_n != 1 || arcLeft_n != 1 ) //not clean bubble
- {
- return;
- }
-
- tmp = tmp->nextInRead;
- }
- }
-
- simiCounter++;
- pinCounter += cleanUpRedundancy ();
-
- if ( pinCounter % 100000 == 0 )
- {
- fprintf ( stderr, ".............%lld bubbles merged.\n", pinCounter );
- }
-
- HasChanged = 1;
+ int slowLength, fastLength, i;
+ unsigned int fastNode, slowNode;
+ READINTERVAL *marker;
+ slowLength = fastLength = 0;
+ fastNode = destination;
+ slowNode = origin;
+ btCounter++;
+
+ while ( fastNode != slowNode )
+ {
+ if ( times[fastNode] > times[slowNode] )
+ {
+ fastLength++;
+ fastNode = previous[fastNode];
+ }
+ else if ( times[fastNode] < times[slowNode] )
+ {
+ slowLength++;
+ slowNode = previous[slowNode];
+ }
+ else if ( isPreviousToNode ( slowNode, fastNode ) )
+ {
+ while ( fastNode != slowNode )
+ {
+ fastLength++;
+ fastNode = previous[fastNode];
+ }
+ }
+ else if ( isPreviousToNode ( fastNode, slowNode ) )
+ {
+ while ( slowNode != fastNode )
+ {
+ slowLength++;
+ slowNode = previous[slowNode];
+ }
+ }
+ else
+ {
+ fastLength++;
+ fastNode = previous[fastNode];
+ slowLength++;
+ slowNode = previous[slowNode];
+ }
+
+ if ( slowLength > MAXNODELENGTH || fastLength > MAXNODELENGTH )
+ {
+ return;
+ }
+ }
+
+ if ( fastLength == 0 )
+ {
+ return;
+ }
+
+ marker = allocateRV ( 1, destination );
+ fastPath = marker;
+
+ for ( i = 0; i < fastLength; i++ )
+ {
+ marker = allocateRV ( 1, previous[fastPath->edgeid] );
+ marker->nextInRead = fastPath;
+ fastPath->prevInRead = marker;
+ fastPath = marker;
+ }
+
+ marker = allocateRV ( 2, destination );
+ slowPath = marker;
+ marker = allocateRV ( 2, origin );
+ marker->nextInRead = slowPath;
+ slowPath->prevInRead = marker;
+ slowPath = marker;
+
+ for ( i = 0; i < slowLength; i++ )
+ {
+ marker = allocateRV ( 2, previous[slowPath->edgeid] );
+ marker->nextInRead = slowPath;
+ slowPath->prevInRead = marker;
+ slowPath = marker;
+ }
+
+ fastSeqLength = extractSequence ( fastPath, fastSequence );
+ slowSeqLength = extractSequence ( slowPath, slowSequence );
+
+ /*
+ if(destination==6359){
+ printf("destination %d, slowLength %d, fastLength %d\n",destination,slowLength,fastLength);
+ printf("fastSeqLength %d, slowSeqLength %d\n",fastSeqLength,slowSeqLength);
+ }
+ */
+ if ( !fastSeqLength || !slowSeqLength )
+ {
+ detachPathSingle ( slowPath );
+ detachPathSingle ( fastPath );
+ return;
+ }
+
+ cmpCounter++;
+
+ if ( !compareSequences ( fastSequence, slowSequence, fastSeqLength, slowSeqLength ) )
+ {
+ detachPathSingle ( slowPath );
+ detachPathSingle ( fastPath );
+ return;
+ }
+
+ //only merge clean bubble ...
+ if ( clean )
+ {
+ unsigned int bal_ed;
+ unsigned int arcRight_n, arcLeft_n;
+ READINTERVAL *tmp;
+ tmp = fastPath->nextInRead;
+
+ while ( tmp->nextInRead )
+ {
+ bal_ed = getTwinEdge ( tmp->edgeid );
+ arcCount ( tmp->edgeid, &arcRight_n );
+ arcCount ( bal_ed, &arcLeft_n );
+
+ if ( arcRight_n != 1 || arcLeft_n != 1 ) //not clean bubble
+ {
+ return;
+ }
+
+ tmp = tmp->nextInRead;
+ }
+
+ tmp = slowPath->nextInRead;
+
+ while ( tmp->nextInRead )
+ {
+ bal_ed = getTwinEdge ( tmp->edgeid );
+ arcCount ( tmp->edgeid, &arcRight_n );
+ arcCount ( bal_ed, &arcLeft_n );
+
+ if ( arcRight_n != 1 || arcLeft_n != 1 ) //not clean bubble
+ {
+ return;
+ }
+
+ tmp = tmp->nextInRead;
+ }
+ }
+
+ simiCounter++;
+ pinCounter += cleanUpRedundancy ();
+
+ if ( pinCounter % 100000 == 0 )
+ {
+ fprintf ( stderr, ".............%lld bubbles merged.\n", pinCounter );
+ }
+
+ HasChanged = 1;
}
/*************************************************
@@ -2197,67 +2203,67 @@ Return:
static void tourBusArc ( unsigned int origin, unsigned int destination, unsigned int arcMulti, Time originTime )
{
- Time arcTime, totalTime, destinationTime;
- unsigned int oldPrevious = previous[destination];
-
- if ( oldPrevious == origin || edge_array[destination].multi == 1 )
- {
- return;
- }
-
- arcCounter++;
-
- if ( arcMulti > 0 )
- {
- arcTime = ( ( Time ) edge_array[origin].length ) / ( ( Time ) arcMulti );
- }
- else
- {
- arcTime = 0.0;
- fprintf ( stderr, "Arc from %d to %d with flags %d originTime %f, arc %d.\n", origin, destination, edge_array[destination].multi, originTime, arcMulti );
- }
-
- totalTime = originTime + arcTime;
- /*
- if(destination==289129||destination==359610){
- printf("arc from %d to %d with flags %d time %f originTime %f, arc %d\n",
- origin,destination,edge_array[destination].multi,totalTime,originTime,arcMulti);
- fflush(stdout);
- }
- */
- destinationTime = times[destination];
-
- if ( destinationTime == -1 )
- {
- times[destination] = totalTime;
- dheapNodes[destination] = insertNodeIntoDHeap ( dheap, totalTime, destination );
- dnodeCounter++;
- previous[destination] = origin;
- return;
- }
- else if ( destinationTime > totalTime )
- {
- if ( dheapNodes[destination] == NULL )
- {
- return;
- }
-
- replaceCounter++;
- times[destination] = totalTime;
- replaceKeyInDHeap ( dheap, dheapNodes[destination], totalTime );
- previous[destination] = origin;
- comparePaths ( destination, oldPrevious );
- return;
- }
- else
- {
- if ( destinationTime == times[origin] && isPreviousToNode ( destination, origin ) )
- {
- return;
- }
-
- comparePaths ( destination, origin );
- }
+ Time arcTime, totalTime, destinationTime;
+ unsigned int oldPrevious = previous[destination];
+
+ if ( oldPrevious == origin || edge_array[destination].multi == 1 )
+ {
+ return;
+ }
+
+ arcCounter++;
+
+ if ( arcMulti > 0 )
+ {
+ arcTime = ( ( Time ) edge_array[origin].length ) / ( ( Time ) arcMulti );
+ }
+ else
+ {
+ arcTime = 0.0;
+ fprintf ( stderr, "Arc from %d to %d with flags %d originTime %f, arc %d.\n", origin, destination, edge_array[destination].multi, originTime, arcMulti );
+ }
+
+ totalTime = originTime + arcTime;
+ /*
+ if(destination==289129||destination==359610){
+ printf("arc from %d to %d with flags %d time %f originTime %f, arc %d\n",
+ origin,destination,edge_array[destination].multi,totalTime,originTime,arcMulti);
+ fflush(stdout);
+ }
+ */
+ destinationTime = times[destination];
+
+ if ( destinationTime == -1 )
+ {
+ times[destination] = totalTime;
+ dheapNodes[destination] = insertNodeIntoDHeap ( dheap, totalTime, destination );
+ dnodeCounter++;
+ previous[destination] = origin;
+ return;
+ }
+ else if ( destinationTime > totalTime )
+ {
+ if ( dheapNodes[destination] == NULL )
+ {
+ return;
+ }
+
+ replaceCounter++;
+ times[destination] = totalTime;
+ replaceKeyInDHeap ( dheap, dheapNodes[destination], totalTime );
+ previous[destination] = origin;
+ comparePaths ( destination, oldPrevious );
+ return;
+ }
+ else
+ {
+ if ( destinationTime == times[origin] && isPreviousToNode ( destination, origin ) )
+ {
+ return;
+ }
+
+ comparePaths ( destination, origin );
+ }
}
/*************************************************
@@ -2274,47 +2280,47 @@ Return:
*************************************************/
static void tourBusNode ( unsigned int node )
{
- ARC * parc;
- int index = 0, outNodeNum;
- expanded[expCounter++] = node;
- activeNode = node;
- parc = edge_array[activeNode].arcs;
-
- while ( parc )
- {
- outArcArray[index] = parc;
- outNodeArray[index++] = parc->to_ed;
-
- if ( index >= MAXCONNECTION )
- {
- break;
- }
-
- parc = parc->next;
- }
-
- outNodeNum = index;
- HasChanged = 0;
-
- for ( index = 0; index < outNodeNum; index++ )
- {
- if ( HasChanged )
- {
- parc = getArcBetween ( activeNode, outNodeArray[index] );
- getArcCounter++;
- }
- else
- {
- parc = outArcArray[index];
- }
-
- if ( !parc )
- {
- continue;
- }
-
- tourBusArc ( activeNode, outNodeArray[index], parc->multiplicity, times[activeNode] );
- }
+ ARC *parc;
+ int index = 0, outNodeNum;
+ expanded[expCounter++] = node;
+ activeNode = node;
+ parc = edge_array[activeNode].arcs;
+
+ while ( parc )
+ {
+ outArcArray[index] = parc;
+ outNodeArray[index++] = parc->to_ed;
+
+ if ( index >= MAXCONNECTION )
+ {
+ break;
+ }
+
+ parc = parc->next;
+ }
+
+ outNodeNum = index;
+ HasChanged = 0;
+
+ for ( index = 0; index < outNodeNum; index++ )
+ {
+ if ( HasChanged )
+ {
+ parc = getArcBetween ( activeNode, outNodeArray[index] );
+ getArcCounter++;
+ }
+ else
+ {
+ parc = outArcArray[index];
+ }
+
+ if ( !parc )
+ {
+ continue;
+ }
+
+ tourBusArc ( activeNode, outNodeArray[index], parc->multiplicity, times[activeNode] );
+ }
}
/*
@@ -2347,21 +2353,21 @@ Return:
*************************************************/
static void tourBus ( unsigned int startingPoint )
{
- unsigned int currentNode = startingPoint;
- times[startingPoint] = 0;
- previous[startingPoint] = currentNode;
-
- while ( currentNode > 0 )
- {
- dheapNodes[currentNode] = NULL;
- tourBusNode ( currentNode );
- currentNode = removeNextNodeFromDHeap ( dheap );
-
- if ( currentNode > 0 )
- {
- rnodeCounter++;
- }
- }
+ unsigned int currentNode = startingPoint;
+ times[startingPoint] = 0;
+ previous[startingPoint] = currentNode;
+
+ while ( currentNode > 0 )
+ {
+ dheapNodes[currentNode] = NULL;
+ tourBusNode ( currentNode );
+ currentNode = removeNextNodeFromDHeap ( dheap );
+
+ if ( currentNode > 0 )
+ {
+ rnodeCounter++;
+ }
+ }
}
/*************************************************
@@ -2380,87 +2386,87 @@ Output:
Return:
None.
*************************************************/
-void bubblePinch ( double simiCutoff, char * outfile, int M, boolean isIter, boolean last )
+void bubblePinch ( double simiCutoff, char *outfile, int M, boolean isIter, boolean last )
{
- new_num_vt = 2 * num_vt;
- unsigned int index, counter = 0;
- unsigned int startingNode;
- char temp[256];
- sprintf ( temp, "%s.pathpair", outfile );
- caseA = caseB = caseC = caseD = caseE = 0;
- progress = 0;
- arcCounter = 0;
- dnodeCounter = 0;
- rnodeCounter = 0;
- btCounter = 0;
- cmpCounter = 0;
- simiCounter = 0;
- pinCounter = 0;
- replaceCounter = 0;
- getArcCounter = 0;
- cutoff = 1.0 - simiCutoff;
-
- if ( M <= 1 )
- {
- MAXNODELENGTH = 3;
- DIFF = 2;
- }
- else if ( M == 2 )
- {
- MAXNODELENGTH = 9;
- DIFF = 3;
- }
- else
- {
- MAXNODELENGTH = 30;
- DIFF = 10;
- }
-
- fprintf ( stderr, "Start to pinch bubbles, cutoff %f, MAX NODE NUM %d, MAX DIFF NUM %d.\n", cutoff, MAXNODELENGTH, DIFF );
- createRVmemo ();
- times = ( Time * ) ckalloc ( ( num_ed + 1 ) * sizeof ( Time ) );
- previous = ( unsigned int * ) ckalloc ( ( num_ed + 1 ) * sizeof ( unsigned int ) );
- expanded = ( unsigned int * ) ckalloc ( ( num_ed + 1 ) * sizeof ( unsigned int ) );
- dheapNodes = ( DFibHeapNode ** ) ckalloc ( ( num_ed + 1 ) * sizeof ( DFibHeapNode * ) );
- WORDFILTER = createFilter ( overlaplen );
-
- for ( index = 1; index <= num_ed; index++ )
- {
- times[index] = -1;
- previous[index] = 0;
- dheapNodes[index] = NULL;
- }
-
- dheap = newDFibHeap ();
- eligibleStartingPoints = ( unsigned int * ) ckalloc ( ( num_ed + 1 ) * sizeof ( unsigned int ) );
- resetNodeStatus ();
- createArcLookupTable ();
- recordArcsInLookupTable ();
-
- while ( ( startingNode = nextStartingPoint () ) > 0 )
- {
- counter++;
- expCounter = 0;
- tourBus ( startingNode );
- updateNodeStatus ();
- }
-
- resetNodeStatus ();
- deleteArcLookupTable ();
- destroyReadIntervMem ();
- fprintf ( stderr, "%d start points, %lld dheap nodes.\n", counter, dnodeCounter );
- fprintf ( stderr, "%lld pair(s) found, %lld pair of path(s) compared, %lld pair(s) merged.\n", btCounter, cmpCounter, pinCounter );
- fprintf ( stderr, "Sequence comparison failed:\n" );
- fprintf ( stderr, " Path crossing deleted edge %lld\n", caseA );
- fprintf ( stderr, " Length difference of two paths greater than two %lld\n", caseB );
- fprintf ( stderr, " Mismatch score greater than cutoff (%d) %lld\n", DIFF, caseC );
- fprintf ( stderr, " Mismatch score ratio greater than cutoff (%.1f) %lld\n", cutoff, caseD );
- fprintf ( stderr, " Path length shorter than (Kmer-1) %lld\n", caseE );
- free ( ( void * ) eligibleStartingPoints );
- destroyDHeap ( dheap );
- free ( ( void * ) dheapNodes );
- free ( ( void * ) times );
- free ( ( void * ) previous );
- free ( ( void * ) expanded );
- linearConcatenate ( isIter, last );
+ new_num_vt = 2 * num_vt;
+ unsigned int index, counter = 0;
+ unsigned int startingNode;
+ char temp[256];
+ sprintf ( temp, "%s.pathpair", outfile );
+ caseA = caseB = caseC = caseD = caseE = 0;
+ progress = 0;
+ arcCounter = 0;
+ dnodeCounter = 0;
+ rnodeCounter = 0;
+ btCounter = 0;
+ cmpCounter = 0;
+ simiCounter = 0;
+ pinCounter = 0;
+ replaceCounter = 0;
+ getArcCounter = 0;
+ cutoff = 1.0 - simiCutoff;
+
+ if ( M <= 1 )
+ {
+ MAXNODELENGTH = 3;
+ DIFF = 2;
+ }
+ else if ( M == 2 )
+ {
+ MAXNODELENGTH = 9;
+ DIFF = 3;
+ }
+ else
+ {
+ MAXNODELENGTH = 30;
+ DIFF = 10;
+ }
+
+ fprintf ( stderr, "Start to pinch bubbles, cutoff %f, MAX NODE NUM %d, MAX DIFF NUM %d.\n", cutoff, MAXNODELENGTH, DIFF );
+ createRVmemo ();
+ times = ( Time * ) ckalloc ( ( num_ed + 1 ) * sizeof ( Time ) );
+ previous = ( unsigned int * ) ckalloc ( ( num_ed + 1 ) * sizeof ( unsigned int ) );
+ expanded = ( unsigned int * ) ckalloc ( ( num_ed + 1 ) * sizeof ( unsigned int ) );
+ dheapNodes = ( DFibHeapNode ** ) ckalloc ( ( num_ed + 1 ) * sizeof ( DFibHeapNode * ) );
+ WORDFILTER = createFilter ( overlaplen );
+
+ for ( index = 1; index <= num_ed; index++ )
+ {
+ times[index] = -1;
+ previous[index] = 0;
+ dheapNodes[index] = NULL;
+ }
+
+ dheap = newDFibHeap ();
+ eligibleStartingPoints = ( unsigned int * ) ckalloc ( ( num_ed + 1 ) * sizeof ( unsigned int ) );
+ resetNodeStatus ();
+ createArcLookupTable ();
+ recordArcsInLookupTable ();
+
+ while ( ( startingNode = nextStartingPoint () ) > 0 )
+ {
+ counter++;
+ expCounter = 0;
+ tourBus ( startingNode );
+ updateNodeStatus ();
+ }
+
+ resetNodeStatus ();
+ deleteArcLookupTable ();
+ destroyReadIntervMem ();
+ fprintf ( stderr, "%d start points, %lld dheap nodes.\n", counter, dnodeCounter );
+ fprintf ( stderr, "%lld pair(s) found, %lld pair of path(s) compared, %lld pair(s) merged.\n", btCounter, cmpCounter, pinCounter );
+ fprintf ( stderr, "Sequence comparison failed:\n" );
+ fprintf ( stderr, " Path crossing deleted edge %lld\n", caseA );
+ fprintf ( stderr, " Length difference of two paths greater than two %lld\n", caseB );
+ fprintf ( stderr, " Mismatch score greater than cutoff (%d) %lld\n", DIFF, caseC );
+ fprintf ( stderr, " Mismatch score ratio greater than cutoff (%.1f) %lld\n", cutoff, caseD );
+ fprintf ( stderr, " Path length shorter than (Kmer-1) %lld\n", caseE );
+ free ( ( void * ) eligibleStartingPoints );
+ destroyDHeap ( dheap );
+ free ( ( void * ) dheapNodes );
+ free ( ( void * ) times );
+ free ( ( void * ) previous );
+ free ( ( void * ) expanded );
+ linearConcatenate ( isIter, last );
}
diff --git a/standardPregraph/check.c b/standardPregraph/check.c
index be06fba..c8f0b92 100644
--- a/standardPregraph/check.c
+++ b/standardPregraph/check.c
@@ -1,7 +1,7 @@
/*
* check.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -21,112 +21,112 @@
*/
#include <stdinc.h>
-void * ckalloc ( unsigned long long amount );
-FILE * ckopen ( char * name, char * mode );
-FILE * ckopen ( char * name, char * mode )
+void *ckalloc ( unsigned long long amount );
+FILE *ckopen ( char *name, char *mode );
+FILE *ckopen ( char *name, char *mode )
{
- FILE * fp;
+ FILE *fp;
- if ( ( fp = fopen ( name, mode ) ) == NULL )
- {
- fprintf ( stderr, "Cannot open %s. Now exit to system...\n", name );
- exit ( -1 );
- }
+ if ( ( fp = fopen ( name, mode ) ) == NULL )
+ {
+ fprintf ( stderr, "Cannot open %s. Now exit to system...\n", name );
+ exit ( -1 );
+ }
- return ( fp );
+ return ( fp );
}
-static int GetFileSize ( FILE * fp )
+static int GetFileSize ( FILE *fp )
{
- char c = fgetc ( fp );
+ char c = fgetc ( fp );
- if ( c == EOF )
- {
- return 0;
- }
+ if ( c == EOF )
+ {
+ return 0;
+ }
- return 1;
+ return 1;
}
-boolean check_file ( char * name )
+boolean check_file ( char *name )
{
- FILE * linkF;
-
- if ( strlen ( name ) > 3 && strcmp ( name + strlen ( name ) - 3, ".gz" ) == 0 )
- {
- char cmd[1000];
- sprintf ( cmd, "gzip -dc %s", name );
- linkF = popen ( cmd, "r" );
-
- if ( linkF )
- {
- if ( GetFileSize ( linkF ) != 0 )
- {
- pclose ( linkF );
- return 1;
- }
-
- pclose ( linkF );
- }
-
- return 0;
- }
- else
- {
- linkF = fopen ( name, "r" );
-
- if ( linkF )
- {
- if ( GetFileSize ( linkF ) != 0 )
- {
- fclose ( linkF );
- return 1;
- }
-
- fclose ( linkF );
- }
-
- return 0;
- }
+ FILE *linkF;
+
+ if ( strlen ( name ) > 3 && strcmp ( name + strlen ( name ) - 3, ".gz" ) == 0 )
+ {
+ char cmd[1000];
+ sprintf ( cmd, "gzip -dc %s", name );
+ linkF = popen ( cmd, "r" );
+
+ if ( linkF )
+ {
+ if ( GetFileSize ( linkF ) != 0 )
+ {
+ pclose ( linkF );
+ return 1;
+ }
+
+ pclose ( linkF );
+ }
+
+ return 0;
+ }
+ else
+ {
+ linkF = fopen ( name, "r" );
+
+ if ( linkF )
+ {
+ if ( GetFileSize ( linkF ) != 0 )
+ {
+ fclose ( linkF );
+ return 1;
+ }
+
+ fclose ( linkF );
+ }
+
+ return 0;
+ }
}
/* ckalloc - allocate space; check for success */
-void * ckalloc ( unsigned long long amount )
+void *ckalloc ( unsigned long long amount )
{
- void * p;
-
- if ( ( p = ( void * ) calloc ( 1, ( unsigned long long ) amount ) ) == NULL && amount != 0 )
- {
- fprintf ( stderr, "Ran out of memory while applying %lldbytes\n", amount );
- fprintf ( stderr, "There may be errors as follows:\n" );
- fprintf ( stderr, "1) Not enough memory.\n" );
- fprintf ( stderr, "2) The ARRAY may be overrode.\n" );
- fprintf ( stderr, "3) The wild pointers.\n" );
- exit ( -1 );
- }
-
- return ( p );
+ void *p;
+
+ if ( ( p = ( void * ) calloc ( 1, ( unsigned long long ) amount ) ) == NULL && amount != 0 )
+ {
+ fprintf ( stderr, "Ran out of memory while applying %lldbytes\n", amount );
+ fprintf ( stderr, "There may be errors as follows:\n" );
+ fprintf ( stderr, "1) Not enough memory.\n" );
+ fprintf ( stderr, "2) The ARRAY may be overrode.\n" );
+ fprintf ( stderr, "3) The wild pointers.\n" );
+ exit ( -1 );
+ }
+
+ return ( p );
}
/* reallocate memory */
-void * ckrealloc ( void * p, size_t new_size, size_t old_size )
+void *ckrealloc ( void *p, size_t new_size, size_t old_size )
{
- void * q;
- q = realloc ( ( void * ) p, new_size );
-
- if ( new_size == 0 || q != ( void * ) 0 )
- {
- return q;
- }
-
- /* manually reallocate space */
- q = ckalloc ( new_size );
- /* move old memory to new space */
- bcopy ( p, q, old_size );
- free ( p );
- return q;
+ void *q;
+ q = realloc ( ( void * ) p, new_size );
+
+ if ( new_size == 0 || q != ( void * ) 0 )
+ {
+ return q;
+ }
+
+ /* manually reallocate space */
+ q = ckalloc ( new_size );
+ /* move old memory to new space */
+ bcopy ( p, q, old_size );
+ free ( p );
+ return q;
}
diff --git a/standardPregraph/compactEdge.c b/standardPregraph/compactEdge.c
index edf9824..6b5836e 100644
--- a/standardPregraph/compactEdge.c
+++ b/standardPregraph/compactEdge.c
@@ -1,7 +1,7 @@
/*
* compactEdge.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -28,70 +28,70 @@
void copyEdge ( unsigned int source, unsigned int target )
{
- edge_array[target].from_vt = edge_array[source].from_vt;
- edge_array[target].to_vt = edge_array[source].to_vt;
- edge_array[target].length = edge_array[source].length;
- edge_array[target].cvg = edge_array[source].cvg;
- edge_array[target].multi = edge_array[source].multi;
- edge_array[target].flag = edge_array[source].flag;
-
- if ( edge_array[target].seq )
- {
- free ( ( void * ) edge_array[target].seq );
- }
-
- edge_array[target].seq = edge_array[source].seq;
- edge_array[source].seq = NULL;
- edge_array[target].arcs = edge_array[source].arcs;
- edge_array[source].arcs = NULL;
- edge_array[target].markers = edge_array[source].markers;
- edge_array[source].markers = NULL;
- edge_array[target].deleted = edge_array[source].deleted;
+ edge_array[target].from_vt = edge_array[source].from_vt;
+ edge_array[target].to_vt = edge_array[source].to_vt;
+ edge_array[target].length = edge_array[source].length;
+ edge_array[target].cvg = edge_array[source].cvg;
+ edge_array[target].multi = edge_array[source].multi;
+ edge_array[target].flag = edge_array[source].flag;
+
+ if ( edge_array[target].seq )
+ {
+ free ( ( void * ) edge_array[target].seq );
+ }
+
+ edge_array[target].seq = edge_array[source].seq;
+ edge_array[source].seq = NULL;
+ edge_array[target].arcs = edge_array[source].arcs;
+ edge_array[source].arcs = NULL;
+ edge_array[target].markers = edge_array[source].markers;
+ edge_array[source].markers = NULL;
+ edge_array[target].deleted = edge_array[source].deleted;
}
//move edge from source to target
void edgeMove ( unsigned int source, unsigned int target )
{
- unsigned int bal_source, bal_target;
- ARC * arc;
- copyEdge ( source, target );
- bal_source = getTwinEdge ( source );
-
- //bal_edge
- if ( bal_source != source )
- {
- bal_target = target + 1;
- copyEdge ( bal_source, bal_target );
- edge_array[target].bal_edge = 2;
- edge_array[bal_target].bal_edge = 0;
- }
- else
- {
- edge_array[target].bal_edge = 1;
- bal_target = target;
- }
-
- //take care of the arcs
- arc = edge_array[target].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = bal_target;
- arc = arc->next;
- }
-
- if ( bal_target == target )
- {
- return;
- }
-
- arc = edge_array[bal_target].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = target;
- arc = arc->next;
- }
+ unsigned int bal_source, bal_target;
+ ARC *arc;
+ copyEdge ( source, target );
+ bal_source = getTwinEdge ( source );
+
+ //bal_edge
+ if ( bal_source != source )
+ {
+ bal_target = target + 1;
+ copyEdge ( bal_source, bal_target );
+ edge_array[target].bal_edge = 2;
+ edge_array[bal_target].bal_edge = 0;
+ }
+ else
+ {
+ edge_array[target].bal_edge = 1;
+ bal_target = target;
+ }
+
+ //take care of the arcs
+ arc = edge_array[target].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = bal_target;
+ arc = arc->next;
+ }
+
+ if ( bal_target == target )
+ {
+ return;
+ }
+
+ arc = edge_array[bal_target].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = target;
+ arc = arc->next;
+ }
}
/*************************************************
@@ -108,35 +108,35 @@ Return:
*************************************************/
void compactEdgeArray ()
{
- unsigned int i;
- unsigned int validCounter = 0;
- unsigned int bal_ed;
- fprintf ( stderr, "Before compacting, %d edge(s) existed.\n", num_ed );
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].deleted )
- {
- continue;
- }
-
- validCounter++;
-
- if ( i == validCounter )
- {
- continue;
- }
-
- bal_ed = getTwinEdge ( i );
- edgeMove ( i, validCounter );
-
- if ( bal_ed != i )
- {
- i++;
- validCounter++;
- }
- }
-
- num_ed = validCounter;
- fprintf ( stderr, "After compacting, %d edge(s) left.\n", num_ed );
+ unsigned int i;
+ unsigned int validCounter = 0;
+ unsigned int bal_ed;
+ fprintf ( stderr, "Before compacting, %d edge(s) existed.\n", num_ed );
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].deleted )
+ {
+ continue;
+ }
+
+ validCounter++;
+
+ if ( i == validCounter )
+ {
+ continue;
+ }
+
+ bal_ed = getTwinEdge ( i );
+ edgeMove ( i, validCounter );
+
+ if ( bal_ed != i )
+ {
+ i++;
+ validCounter++;
+ }
+ }
+
+ num_ed = validCounter;
+ fprintf ( stderr, "After compacting, %d edge(s) left.\n", num_ed );
}
diff --git a/standardPregraph/concatenateEdge.c b/standardPregraph/concatenateEdge.c
index c795e46..1ec5a11 100644
--- a/standardPregraph/concatenateEdge.c
+++ b/standardPregraph/concatenateEdge.c
@@ -1,7 +1,7 @@
/*
* concatenateEdge.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -26,17 +26,17 @@
#include "extfunc.h"
#include "extvab.h"
-void copySeq ( char * targetS, char * sourceS, int pos, int length )
+void copySeq ( char *targetS, char *sourceS, int pos, int length )
{
- char ch;
- int i, index;
- index = pos;
-
- for ( i = 0; i < length; i++ )
- {
- ch = getCharInTightString ( sourceS, i );
- writeChar2tightString ( ch, targetS, index++ );
- }
+ char ch;
+ int i, index;
+ index = pos;
+
+ for ( i = 0; i < length; i++ )
+ {
+ ch = getCharInTightString ( sourceS, i );
+ writeChar2tightString ( ch, targetS, index++ );
+ }
}
/*************************************************
@@ -55,189 +55,197 @@ Return:
*************************************************/
void linearUpdateConnection ( unsigned int e1, unsigned int e2, int indicate )
{
- unsigned int bal_ed;
- ARC * parc;
-
- if ( !indicate )
- {
- edge_array[e1].to_vt = edge_array[e2].to_vt;
- bal_ed = getTwinEdge ( e1 );
- parc = edge_array[e2].arcs;
-
- while ( parc )
- {
- parc->bal_arc->to_ed = bal_ed;
- parc = parc->next;
- }
-
- edge_array[e1].arcs = edge_array[e2].arcs;
- edge_array[e2].arcs = NULL;
-
- if ( edge_array[e1].length || edge_array[e2].length )
- { edge_array[e1].cvg = ( edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length ) / ( edge_array[e1].length + edge_array[e2].length ); }
-
- edge_array[e2].deleted = 1;
- }
- else
- {
- //all the arcs pointing to e1 switch to e2
- parc = edge_array[getTwinEdge ( e1 )].arcs;
-
- while ( parc )
- {
- parc->bal_arc->to_ed = e2;
- parc = parc->next;
- }
-
- edge_array[e1].arcs = NULL;
- edge_array[e2].from_vt = edge_array[e1].from_vt;
-
- if ( edge_array[e1].length || edge_array[e2].length )
- { edge_array[e2].cvg = ( edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length ) / ( edge_array[e1].length + edge_array[e2].length ); }
-
- edge_array[e1].deleted = 1;
- }
+ unsigned int bal_ed;
+ ARC *parc;
+
+ if ( !indicate )
+ {
+ edge_array[e1].to_vt = edge_array[e2].to_vt;
+ bal_ed = getTwinEdge ( e1 );
+ parc = edge_array[e2].arcs;
+
+ while ( parc )
+ {
+ parc->bal_arc->to_ed = bal_ed;
+ parc = parc->next;
+ }
+
+ edge_array[e1].arcs = edge_array[e2].arcs;
+ edge_array[e2].arcs = NULL;
+
+ if ( edge_array[e1].length || edge_array[e2].length )
+ {
+ edge_array[e1].cvg = ( edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length ) / ( edge_array[e1].length + edge_array[e2].length );
+ }
+
+ edge_array[e2].deleted = 1;
+ }
+ else
+ {
+ //all the arcs pointing to e1 switch to e2
+ parc = edge_array[getTwinEdge ( e1 )].arcs;
+
+ while ( parc )
+ {
+ parc->bal_arc->to_ed = e2;
+ parc = parc->next;
+ }
+
+ edge_array[e1].arcs = NULL;
+ edge_array[e2].from_vt = edge_array[e1].from_vt;
+
+ if ( edge_array[e1].length || edge_array[e2].length )
+ {
+ edge_array[e2].cvg = ( edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length ) / ( edge_array[e1].length + edge_array[e2].length );
+ }
+
+ edge_array[e1].deleted = 1;
+ }
}
-static void printEdgeSeq ( FILE * fp, char * tightSeq, int len )
+static void printEdgeSeq ( FILE *fp, char *tightSeq, int len )
{
- int i;
+ int i;
- for ( i = 0; i < len; i++ )
- {
- fprintf ( fp, "%c", int2base ( ( int ) getCharInTightString ( tightSeq, i ) ) );
+ for ( i = 0; i < len; i++ )
+ {
+ fprintf ( fp, "%c", int2base ( ( int ) getCharInTightString ( tightSeq, i ) ) );
- if ( ( i + overlaplen + 1 ) % 100 == 0 && i < len - 1 )
- {
- fprintf ( fp, "\n" );
- }
- }
+ if ( ( i + overlaplen + 1 ) % 100 == 0 && i < len - 1 )
+ {
+ fprintf ( fp, "\n" );
+ }
+ }
- fprintf ( fp, "\n" );
+ fprintf ( fp, "\n" );
}
void allpathUpdateEdge ( unsigned int e1, unsigned int e2, int indicate, boolean last )
{
- int tightLen;
- char * tightSeq = NULL;
-
- if ( edge_array[e1].cvg == 0 )
- {
- edge_array[e1].cvg = edge_array[e2].cvg;
- }
-
- if ( edge_array[e2].cvg == 0 )
- {
- edge_array[e2].cvg = edge_array[e1].cvg;
- }
-
- /*
- if(edge_array[e1].length&&edge_array[e2].length){
- fprintf(stderr,">e1\n");
- printEdgeSeq(stderr,edge_array[e1].seq,edge_array[e1].length);
- fprintf(stderr,">e2\n");
- printEdgeSeq(stderr,edge_array[e2].seq,edge_array[e2].length);
- } */
- unsigned int cvgsum = edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length;
- tightLen = edge_array[e1].length + edge_array[e2].length;
-
- if ( tightLen )
- {
- tightSeq = ( char * ) ckalloc ( ( tightLen / 4 + 1 ) * sizeof ( char ) );
- }
-
- tightLen = 0;
-
- if ( edge_array[e1].length )
- {
- copySeq ( tightSeq, edge_array[e1].seq, 0, edge_array[e1].length );
- tightLen = edge_array[e1].length;
-
- if ( edge_array[e1].seq )
- {
- free ( ( void * ) edge_array[e1].seq );
- edge_array[e1].seq = NULL;
- }
- else
- {
- fprintf ( stderr, "AllpathUpdateEdge: edge %d with length %d, but without seq.\n", e1, edge_array[e1].length );
- }
- }
-
- if ( edge_array[e2].length )
- {
- copySeq ( tightSeq, edge_array[e2].seq, tightLen, edge_array[e2].length );
- tightLen += edge_array[e2].length;
-
- if ( edge_array[e2].seq )
- {
- free ( ( void * ) edge_array[e2].seq );
- edge_array[e2].seq = NULL;
- }
- else
- {
- fprintf ( stderr, "AllpathUpdateEdge: edge %d with length %d, but without seq.\n", e2, edge_array[e2].length );
- }
- }
-
- /*
- if(edge_array[e1].length&&edge_array[e2].length){
- fprintf(stderr,">e1+e2\n");
- printEdgeSeq(stderr,tightSeq,tightLen);
- }
- */
- //edge_array[e2].extend_len = tightLen-edge_array[e2].length;
- //the sequence of e1 is to be updated
- if ( !indicate )
- {
- edge_array[e2].length = 0; //e1 is removed from the graph
- edge_array[e1].to_vt = edge_array[e2].to_vt; //e2 is part of e1 now
- edge_array[e1].length = tightLen;
- edge_array[e1].seq = tightSeq;
-
- if ( tightLen )
- {
- edge_array[e1].cvg = cvgsum / tightLen;
- }
-
- if ( last )
- { edge_array[e1].cvg = edge_array[e1].cvg > 0 ? edge_array[e1].cvg : 1; }
-
- // edge_array[e1].cvg = edge_array[e1].cvg > 0 ? edge_array[e1].cvg : 1;
- }
- else
- {
- edge_array[e1].length = 0; //e1 is removed from the graph
- edge_array[e2].from_vt = edge_array[e1].from_vt; //e1 is part of e2 now
- edge_array[e2].length = tightLen;
- edge_array[e2].seq = tightSeq;
-
- if ( tightLen )
- {
- edge_array[e2].cvg = cvgsum / tightLen;
- }
-
- if ( last )
- { edge_array[e2].cvg = edge_array[e2].cvg > 0 ? edge_array[e2].cvg : 1; }
-
- // edge_array[e2].cvg = edge_array[e2].cvg > 0 ? edge_array[e2].cvg : 1;
- }
+ int tightLen;
+ char *tightSeq = NULL;
+
+ if ( edge_array[e1].cvg == 0 )
+ {
+ edge_array[e1].cvg = edge_array[e2].cvg;
+ }
+
+ if ( edge_array[e2].cvg == 0 )
+ {
+ edge_array[e2].cvg = edge_array[e1].cvg;
+ }
+
+ /*
+ if(edge_array[e1].length&&edge_array[e2].length){
+ fprintf(stderr,">e1\n");
+ printEdgeSeq(stderr,edge_array[e1].seq,edge_array[e1].length);
+ fprintf(stderr,">e2\n");
+ printEdgeSeq(stderr,edge_array[e2].seq,edge_array[e2].length);
+ } */
+ unsigned int cvgsum = edge_array[e1].cvg * edge_array[e1].length + edge_array[e2].cvg * edge_array[e2].length;
+ tightLen = edge_array[e1].length + edge_array[e2].length;
+
+ if ( tightLen )
+ {
+ tightSeq = ( char * ) ckalloc ( ( tightLen / 4 + 1 ) * sizeof ( char ) );
+ }
+
+ tightLen = 0;
+
+ if ( edge_array[e1].length )
+ {
+ copySeq ( tightSeq, edge_array[e1].seq, 0, edge_array[e1].length );
+ tightLen = edge_array[e1].length;
+
+ if ( edge_array[e1].seq )
+ {
+ free ( ( void * ) edge_array[e1].seq );
+ edge_array[e1].seq = NULL;
+ }
+ else
+ {
+ fprintf ( stderr, "AllpathUpdateEdge: edge %d with length %d, but without seq.\n", e1, edge_array[e1].length );
+ }
+ }
+
+ if ( edge_array[e2].length )
+ {
+ copySeq ( tightSeq, edge_array[e2].seq, tightLen, edge_array[e2].length );
+ tightLen += edge_array[e2].length;
+
+ if ( edge_array[e2].seq )
+ {
+ free ( ( void * ) edge_array[e2].seq );
+ edge_array[e2].seq = NULL;
+ }
+ else
+ {
+ fprintf ( stderr, "AllpathUpdateEdge: edge %d with length %d, but without seq.\n", e2, edge_array[e2].length );
+ }
+ }
+
+ /*
+ if(edge_array[e1].length&&edge_array[e2].length){
+ fprintf(stderr,">e1+e2\n");
+ printEdgeSeq(stderr,tightSeq,tightLen);
+ }
+ */
+ //edge_array[e2].extend_len = tightLen-edge_array[e2].length;
+ //the sequence of e1 is to be updated
+ if ( !indicate )
+ {
+ edge_array[e2].length = 0; //e1 is removed from the graph
+ edge_array[e1].to_vt = edge_array[e2].to_vt; //e2 is part of e1 now
+ edge_array[e1].length = tightLen;
+ edge_array[e1].seq = tightSeq;
+
+ if ( tightLen )
+ {
+ edge_array[e1].cvg = cvgsum / tightLen;
+ }
+
+ if ( last )
+ {
+ edge_array[e1].cvg = edge_array[e1].cvg > 0 ? edge_array[e1].cvg : 1;
+ }
+
+ // edge_array[e1].cvg = edge_array[e1].cvg > 0 ? edge_array[e1].cvg : 1;
+ }
+ else
+ {
+ edge_array[e1].length = 0; //e1 is removed from the graph
+ edge_array[e2].from_vt = edge_array[e1].from_vt; //e1 is part of e2 now
+ edge_array[e2].length = tightLen;
+ edge_array[e2].seq = tightSeq;
+
+ if ( tightLen )
+ {
+ edge_array[e2].cvg = cvgsum / tightLen;
+ }
+
+ if ( last )
+ {
+ edge_array[e2].cvg = edge_array[e2].cvg > 0 ? edge_array[e2].cvg : 1;
+ }
+
+ // edge_array[e2].cvg = edge_array[e2].cvg > 0 ? edge_array[e2].cvg : 1;
+ }
}
static void debugging ( unsigned int i )
{
- ARC * parc;
- parc = edge_array[i].arcs;
-
- if ( !parc )
- {
- fprintf ( stderr, "No downward connection for %d.\n", i );
- }
-
- while ( parc )
- {
- fprintf ( stderr, "%d -> %d\n", i, parc->to_ed );
- parc = parc->next;
- }
+ ARC *parc;
+ parc = edge_array[i].arcs;
+
+ if ( !parc )
+ {
+ fprintf ( stderr, "No downward connection for %d.\n", i );
+ }
+
+ while ( parc )
+ {
+ fprintf ( stderr, "%d -> %d\n", i, parc->to_ed );
+ parc = parc->next;
+ }
}
/*************************************************
@@ -255,81 +263,83 @@ Return:
*************************************************/
void linearConcatenate ( boolean isIter, boolean last )
{
- unsigned int i;
- int conc_c = 1;
- int counter;
- unsigned int from_ed, to_ed, bal_ed;
- ARC * parc, *parc2;
- unsigned int bal_fe;
- int donot1 = 0;
- int round = 1;
-
- //debugging(30514);
- while ( conc_c )
- {
- conc_c = 0;
- counter = 0;
- donot1 = 0;
-
- for ( i = 1; i <= num_ed; i++ ) //num_ed
- {
- if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
- {
- continue;
- }
-
- if ( edge_array[i].length > 0 )
- {
- counter++;
- }
-
- parc = edge_array[i].arcs;
-
- if ( !parc || parc->next )
- {
- continue;
- }
-
- to_ed = parc->to_ed;
- bal_ed = getTwinEdge ( to_ed );
- parc2 = edge_array[bal_ed].arcs;
-
- if ( bal_ed == to_ed || !parc2 || parc2->next )
- {
- continue;
- }
-
- from_ed = i;
-
- if ( from_ed == to_ed || from_ed == bal_ed )
- {
- continue;
- }
-
- if ( parc->multiplicity <= arcfilter )
- {
- donot1++;
- continue;
- }
-
- //linear connection found
- conc_c++;
- linearUpdateConnection ( from_ed, to_ed, 0 );
- allpathUpdateEdge ( from_ed, to_ed, 0, last );
- bal_fe = getTwinEdge ( from_ed );
- linearUpdateConnection ( bal_ed, bal_fe, 1 );
- allpathUpdateEdge ( bal_ed, bal_fe, 1, last );
- /*
- if(from_ed==6589||to_ed==6589)
- printf("%d <- %d (%d)\n",from_ed,to_ed,i);
- if(bal_fe==6589||bal_ed==6589)
- printf("%d <- %d (%d)\n",bal_fe,bal_ed,i);
- */
- }
-
- fprintf ( stderr, "%d edge(s) concatenated in cycle %d.\n", conc_c, round++ );
-
- if ( arcfilter )
- { fprintf ( stderr, "%d edge(s) with weight %d were not linearized.\n", donot1, arcfilter ); }
- }
+ unsigned int i;
+ int conc_c = 1;
+ int counter;
+ unsigned int from_ed, to_ed, bal_ed;
+ ARC *parc, *parc2;
+ unsigned int bal_fe;
+ int donot1 = 0;
+ int round = 1;
+
+ //debugging(30514);
+ while ( conc_c )
+ {
+ conc_c = 0;
+ counter = 0;
+ donot1 = 0;
+
+ for ( i = 1; i <= num_ed; i++ ) //num_ed
+ {
+ if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
+ {
+ continue;
+ }
+
+ if ( edge_array[i].length > 0 )
+ {
+ counter++;
+ }
+
+ parc = edge_array[i].arcs;
+
+ if ( !parc || parc->next )
+ {
+ continue;
+ }
+
+ to_ed = parc->to_ed;
+ bal_ed = getTwinEdge ( to_ed );
+ parc2 = edge_array[bal_ed].arcs;
+
+ if ( bal_ed == to_ed || !parc2 || parc2->next )
+ {
+ continue;
+ }
+
+ from_ed = i;
+
+ if ( from_ed == to_ed || from_ed == bal_ed )
+ {
+ continue;
+ }
+
+ if ( parc->multiplicity <= arcfilter )
+ {
+ donot1++;
+ continue;
+ }
+
+ //linear connection found
+ conc_c++;
+ linearUpdateConnection ( from_ed, to_ed, 0 );
+ allpathUpdateEdge ( from_ed, to_ed, 0, last );
+ bal_fe = getTwinEdge ( from_ed );
+ linearUpdateConnection ( bal_ed, bal_fe, 1 );
+ allpathUpdateEdge ( bal_ed, bal_fe, 1, last );
+ /*
+ if(from_ed==6589||to_ed==6589)
+ printf("%d <- %d (%d)\n",from_ed,to_ed,i);
+ if(bal_fe==6589||bal_ed==6589)
+ printf("%d <- %d (%d)\n",bal_fe,bal_ed,i);
+ */
+ }
+
+ fprintf ( stderr, "%d edge(s) concatenated in cycle %d.\n", conc_c, round++ );
+
+ if ( arcfilter )
+ {
+ fprintf ( stderr, "%d edge(s) with weight %d were not linearized.\n", donot1, arcfilter );
+ }
+ }
}
diff --git a/standardPregraph/connect.c b/standardPregraph/connect.c
index 1f10a8c..6b7e4fc 100644
--- a/standardPregraph/connect.c
+++ b/standardPregraph/connect.c
@@ -1,7 +1,7 @@
/*
* connect.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -30,171 +30,171 @@
void createCntMemManager ()
{
- if ( !cn_mem_manager )
- {
- cn_mem_manager = createMem_manager ( CNBLOCKSIZE, sizeof ( CONNECT ) );
- }
- else
- {
- fprintf ( stderr, "The cn_mem_manger created.\n" );
- }
+ if ( !cn_mem_manager )
+ {
+ cn_mem_manager = createMem_manager ( CNBLOCKSIZE, sizeof ( CONNECT ) );
+ }
+ else
+ {
+ fprintf ( stderr, "The cn_mem_manger created.\n" );
+ }
}
void destroyConnectMem ()
{
- freeMem_manager ( cn_mem_manager );
- cn_mem_manager = NULL;
+ freeMem_manager ( cn_mem_manager );
+ cn_mem_manager = NULL;
}
-CONNECT * allocateCN ( unsigned int contigId, int gap )
+CONNECT *allocateCN ( unsigned int contigId, int gap )
{
- CONNECT * newCN;
- newCN = ( CONNECT * ) getItem ( cn_mem_manager );
- newCN->contigID = contigId;
- newCN->gapLen = gap;
- newCN->minGap = 0;
- newCN->maxGap = 0;
- newCN->bySmall = 0;
- newCN->smallIns = 0;
- newCN->weakPoint = 0;
- newCN->weight = 1;
- newCN->weightNotInherit = 0;
- newCN->mask = 0;
- newCN->used = 0;
- newCN->checking = 0;
- newCN->deleted = 0;
- newCN->prevInScaf = 0;
- newCN->inherit = 0;
- newCN->singleInScaf = 0;
- newCN->nextInScaf = NULL;
- return newCN;
+ CONNECT *newCN;
+ newCN = ( CONNECT * ) getItem ( cn_mem_manager );
+ newCN->contigID = contigId;
+ newCN->gapLen = gap;
+ newCN->minGap = 0;
+ newCN->maxGap = 0;
+ newCN->bySmall = 0;
+ newCN->smallIns = 0;
+ newCN->weakPoint = 0;
+ newCN->weight = 1;
+ newCN->weightNotInherit = 0;
+ newCN->mask = 0;
+ newCN->used = 0;
+ newCN->checking = 0;
+ newCN->deleted = 0;
+ newCN->prevInScaf = 0;
+ newCN->inherit = 0;
+ newCN->singleInScaf = 0;
+ newCN->nextInScaf = NULL;
+ return newCN;
}
-void output_cntGVZ ( char * outfile )
+void output_cntGVZ ( char *outfile )
{
- char name[256];
- FILE * fp;
- unsigned int i;
- CONNECT * connect;
- boolean flag;
- sprintf ( name, "%s.scaffold.gvz", outfile );
- fp = ckopen ( name, "w" );
- fprintf ( fp, "digraph G{\n" );
- fprintf ( fp, "\tsize=\"512,512\";\n" );
-
- for ( i = num_ctg; i > 0; i-- )
- {
- if ( !contig_array[i].downwardConnect )
- {
- continue;
- }
-
- connect = contig_array[i].downwardConnect;
-
- while ( connect )
- {
- if ( connect->deleted )
- {
- connect = connect->next;
- continue;
- }
-
- if ( connect->prevInScaf || connect->nextInScaf )
- {
- flag = 1;
- }
- else
- {
- flag = 0;
- }
-
- if ( !connect->mask )
- fprintf ( fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\"];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
- connect->gapLen, flag, connect->weight );
- else
- fprintf ( fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\", color = red];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
- connect->gapLen, flag, connect->weight );
-
- connect = connect->next;
- }
- }
-
- fprintf ( fp, "}\n" );
- fclose ( fp );
+ char name[256];
+ FILE *fp;
+ unsigned int i;
+ CONNECT *connect;
+ boolean flag;
+ sprintf ( name, "%s.scaffold.gvz", outfile );
+ fp = ckopen ( name, "w" );
+ fprintf ( fp, "digraph G{\n" );
+ fprintf ( fp, "\tsize=\"512,512\";\n" );
+
+ for ( i = num_ctg; i > 0; i-- )
+ {
+ if ( !contig_array[i].downwardConnect )
+ {
+ continue;
+ }
+
+ connect = contig_array[i].downwardConnect;
+
+ while ( connect )
+ {
+ if ( connect->deleted )
+ {
+ connect = connect->next;
+ continue;
+ }
+
+ if ( connect->prevInScaf || connect->nextInScaf )
+ {
+ flag = 1;
+ }
+ else
+ {
+ flag = 0;
+ }
+
+ if ( !connect->mask )
+ fprintf ( fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\"];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
+ connect->gapLen, flag, connect->weight );
+ else
+ fprintf ( fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\", color = red];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
+ connect->gapLen, flag, connect->weight );
+
+ connect = connect->next;
+ }
+ }
+
+ fprintf ( fp, "}\n" );
+ fclose ( fp );
}
/***************** below this line all codes are about lookup table *****************/
void createCntLookupTable ()
{
- if ( !cntLookupTable )
- {
- cntLookupTable = ( CONNECT ** ) ckalloc ( ( 3 * num_ctg + 1 ) * sizeof ( CONNECT * ) );
- }
+ if ( !cntLookupTable )
+ {
+ cntLookupTable = ( CONNECT ** ) ckalloc ( ( 3 * num_ctg + 1 ) * sizeof ( CONNECT * ) );
+ }
}
void deleteCntLookupTable ()
{
- if ( cntLookupTable )
- {
- free ( ( void * ) cntLookupTable );
- cntLookupTable = NULL;
- }
+ if ( cntLookupTable )
+ {
+ free ( ( void * ) cntLookupTable );
+ cntLookupTable = NULL;
+ }
}
-void putCnt2LookupTable ( unsigned int from_c, CONNECT * cnt )
+void putCnt2LookupTable ( unsigned int from_c, CONNECT *cnt )
{
- if ( !cnt || !cntLookupTable )
- {
- return;
- }
-
- unsigned int index = 2 * from_c + cnt->contigID;
- cnt->nextInLookupTable = cntLookupTable[index];
- cntLookupTable[index] = cnt;
+ if ( !cnt || !cntLookupTable )
+ {
+ return;
+ }
+
+ unsigned int index = 2 * from_c + cnt->contigID;
+ cnt->nextInLookupTable = cntLookupTable[index];
+ cntLookupTable[index] = cnt;
}
-static CONNECT * getCntInLookupTable ( unsigned int from_c, unsigned int to_c )
+static CONNECT *getCntInLookupTable ( unsigned int from_c, unsigned int to_c )
{
- unsigned int index = 2 * from_c + to_c;
- CONNECT * ite_cnt = cntLookupTable[index];
+ unsigned int index = 2 * from_c + to_c;
+ CONNECT *ite_cnt = cntLookupTable[index];
- while ( ite_cnt )
- {
- if ( ite_cnt->contigID == to_c )
- {
- return ite_cnt;
- }
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->contigID == to_c )
+ {
+ return ite_cnt;
+ }
- ite_cnt = ite_cnt->nextInLookupTable;
- }
+ ite_cnt = ite_cnt->nextInLookupTable;
+ }
- return NULL;
+ return NULL;
}
-CONNECT * getCntBetween ( unsigned int from_c, unsigned int to_c )
+CONNECT *getCntBetween ( unsigned int from_c, unsigned int to_c )
{
- CONNECT * pcnt;
+ CONNECT *pcnt;
- if ( cntLookupTable )
- {
- pcnt = getCntInLookupTable ( from_c, to_c );
- return pcnt;
- }
+ if ( cntLookupTable )
+ {
+ pcnt = getCntInLookupTable ( from_c, to_c );
+ return pcnt;
+ }
- pcnt = contig_array[from_c].downwardConnect;
+ pcnt = contig_array[from_c].downwardConnect;
- while ( pcnt )
- {
- if ( pcnt->contigID == to_c )
- {
- return pcnt;
- }
+ while ( pcnt )
+ {
+ if ( pcnt->contigID == to_c )
+ {
+ return pcnt;
+ }
- pcnt = pcnt->next;
- }
+ pcnt = pcnt->next;
+ }
- return pcnt;
+ return pcnt;
}
/*
diff --git a/standardPregraph/contig.c b/standardPregraph/contig.c
index f1ce229..7fd39d5 100644
--- a/standardPregraph/contig.c
+++ b/standardPregraph/contig.c
@@ -1,7 +1,7 @@
/*
* contig.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -25,7 +25,7 @@
#include "kmerhash.h"
#include "extfunc.h"
#include "extvab.h"
-static void initenv ( int argc, char ** argv );
+static void initenv ( int argc, char **argv );
static void display_contig_usage ();
char shortrdsfile[256], graphfile[256];
static boolean repeatSolve; //whether solve repeat or not
@@ -58,262 +58,274 @@ Output:
Return:
None.
*************************************************/
-int call_heavygraph ( int argc, char ** argv )
+int call_heavygraph ( int argc, char **argv )
{
- time_t start_t, stop_t, time_bef, time_aft;
- time ( &start_t );
- boolean ret;
- fprintf ( stderr, "\n********************\n" );
- fprintf ( stderr, "Contig\n" );
- fprintf ( stderr, "********************\n\n" );
- initenv ( argc, argv );
- loadVertex ( graphfile );
- loadEdge ( graphfile );
-
- if ( repeatSolve )
- {
- ret = loadPathBin ( graphfile );
- }
-
- swapedge();
- sortedge();
- freshArc();
-
- if ( repeatSolve )
- {
- time ( &time_bef );
-
- // ret = loadPathBin (graphfile);
- if ( ret )
- {
- solveReps ();
- }
- else
- {
- fprintf ( stderr, "Repeat solving can't be done...\n" );
- }
-
- time ( &time_aft );
- fprintf ( stderr, "Time spent on solving repeat: %ds.\n", ( int ) ( time_aft - time_bef ) );
- }
-
- //edgecvg_bar(edge_array,num_ed,graphfile,100);
-
- if ( !iter && M > 0 )
- {
- time ( &time_bef );
- bubblePinch ( 0.90, graphfile, M, 0, 1 );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on pinching bubbles: %ds.\n", ( int ) ( time_aft - time_bef ) );
- }
-
- if ( iter && cleanBubble && M > 0 )
- {
- time ( &time_bef );
- clean = 1;
- long long oldpinCounter = 0;
- long long min = 10;
- int times = 0;
-
- while ( min >= 10 )
- {
- times++;
-
- if ( times >= 4 ) { break; }
-
- bubblePinch ( 0.90, graphfile, M, 1, 0 );
- min = pinCounter;
- fprintf ( stderr, "%lld clean bubbles merged.\n", pinCounter );
- }
-
- time ( &time_aft );
- fprintf ( stderr, "Time spent on pinching clean bubbles: %ds.\n", ( int ) ( time_aft - time_bef ) );
- clean = 0;
- }
-
- if ( deLowEdge )
- {
- removeWeakEdges ( 2 * overlaplen, 1 );
- removeLowCovEdges ( 2 * overlaplen, deLowEdge, !iter );
- }
-
- cutTipsInGraph ( 0, 0, !iter );
-
- if ( iter )
- {
- Iterate ( shortrdsfile, graphfile, maxk, M ); //keepReadFile,
-
- if ( M > 0 )
- {
- time ( &time_bef );
- bubblePinch ( 0.90, graphfile, M, 1, 0 );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on pinching bubbles: %ds.\n", ( int ) ( time_aft - time_bef ) );
- }
-
- freshpreGraphBasic ( iter, maxk, graphfile );
- }
-
- //output_graph(graphfile);
- output_contig ( edge_array, num_ed, graphfile, overlaplen + 1 );
- output_updated_edges ( graphfile );
- output_heavyArcs ( graphfile );
-
- if ( vt_array )
- {
- free ( ( void * ) vt_array );
- vt_array = NULL;
- }
-
- if ( edge_array )
- {
- free_edge_array ( edge_array, num_ed_limit );
- edge_array = NULL;
- }
-
- destroyArcMem ();
- time ( &stop_t );
- fprintf ( stderr, "\nTime spent on constructing contig: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
- return 0;
+ time_t start_t, stop_t, time_bef, time_aft;
+ time ( &start_t );
+ boolean ret;
+ fprintf ( stderr, "\n********************\n" );
+ fprintf ( stderr, "Contig\n" );
+ fprintf ( stderr, "********************\n\n" );
+ initenv ( argc, argv );
+ loadVertex ( graphfile );
+ loadEdge ( graphfile );
+
+ if ( repeatSolve )
+ {
+ ret = loadPathBin ( graphfile );
+ }
+
+ swapedge();
+ sortedge();
+ freshArc();
+
+ if ( repeatSolve )
+ {
+ time ( &time_bef );
+
+ // ret = loadPathBin (graphfile);
+ if ( ret )
+ {
+ solveReps ();
+ }
+ else
+ {
+ fprintf ( stderr, "Repeat solving can't be done...\n" );
+ }
+
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on solving repeat: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ }
+
+ //edgecvg_bar(edge_array,num_ed,graphfile,100);
+
+ if ( !iter && M > 0 )
+ {
+ time ( &time_bef );
+ bubblePinch ( 0.90, graphfile, M, 0, 1 );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on pinching bubbles: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ }
+
+ if ( iter && cleanBubble && M > 0 )
+ {
+ time ( &time_bef );
+ clean = 1;
+ long long oldpinCounter = 0;
+ long long min = 10;
+ int times = 0;
+
+ while ( min >= 10 )
+ {
+ times++;
+
+ if ( times >= 4 )
+ {
+ break;
+ }
+
+ bubblePinch ( 0.90, graphfile, M, 1, 0 );
+ min = pinCounter;
+ fprintf ( stderr, "%lld clean bubbles merged.\n", pinCounter );
+ }
+
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on pinching clean bubbles: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ clean = 0;
+ }
+
+ if ( deLowEdge )
+ {
+ removeWeakEdges ( 2 * overlaplen, 1 );
+ removeLowCovEdges ( 2 * overlaplen, deLowEdge, !iter );
+ }
+
+ cutTipsInGraph ( 0, 0, !iter );
+
+ if ( iter )
+ {
+ Iterate ( shortrdsfile, graphfile, maxk, M ); //keepReadFile,
+
+ if ( M > 0 )
+ {
+ time ( &time_bef );
+ bubblePinch ( 0.90, graphfile, M, 1, 0 );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on pinching bubbles: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ }
+
+ freshpreGraphBasic ( iter, maxk, graphfile );
+ }
+
+ //output_graph(graphfile);
+ output_contig ( edge_array, num_ed, graphfile, overlaplen + 1 );
+ output_updated_edges ( graphfile );
+ output_heavyArcs ( graphfile );
+
+ if ( vt_array )
+ {
+ free ( ( void * ) vt_array );
+ vt_array = NULL;
+ }
+
+ if ( edge_array )
+ {
+ free_edge_array ( edge_array, num_ed_limit );
+ edge_array = NULL;
+ }
+
+ destroyArcMem ();
+ time ( &stop_t );
+ fprintf ( stderr, "\nTime spent on constructing contig: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
+ return 0;
}
/*****************************************************************************
* Parse command line switches
*****************************************************************************/
-void initenv ( int argc, char ** argv )
+void initenv ( int argc, char **argv )
{
- int copt;
- int inpseq, outseq;
- extern char * optarg;
- char temp[100];
- inpseq = outseq = repeatSolve = iter = cleanBubble = 0;//keepReadFile =
- optind = 1;
- fprintf ( stderr, "Parameters: contig " );
-
- while ( ( copt = getopt ( argc, argv, "g:M:D:Rs:m:p:e:E" ) ) != EOF ) // r
- {
- switch ( copt )
- {
- case 'M':
- fprintf ( stderr, "-M %s ", optarg );
- sscanf ( optarg, "%s", temp );
- M = atoi ( temp );
- break;
- case 'D':
- fprintf ( stderr, "-D %s ", optarg );
- sscanf ( optarg, "%s", temp );
- deLowEdge = atoi ( temp ) >= 0 ? atoi ( temp ) : 0;
- break;
- case 'g':
- fprintf ( stderr, "-g %s ", optarg );
- inGraph = 1;
- sscanf ( optarg, "%s", graphfile );
- break;
- case 'R':
- repeatSolve = 1;
- fprintf ( stderr, "-R " );
- break;
- case 's':
- fprintf ( stderr, "-s %s ", optarg );
- inpseq = 1;
- sscanf ( optarg, "%s", shortrdsfile );
- break;
- case 'm':
- fprintf ( stderr, "-m %s ", optarg );
- iter = 1;
- sscanf ( optarg, "%s", temp );
- maxk = atoi ( temp );
- break;
- /*
- case 'r':
- keepReadFile = 1;
- fprintf(stderr, "-r ");
- break;
- */
- case 'e':
- fprintf ( stderr, "-e %s ", optarg );
- sscanf ( optarg, "%s", temp );
- arcfilter = atoi ( temp );
- break;
- case 'p':
- fprintf ( stderr, "-p %s ", optarg );
- sscanf ( optarg, "%s", temp );
- thrd_num = atoi ( temp );
- break;
- case 'E':
- cleanBubble = 1;
- fprintf ( stderr, "-E " );
- break;
- default:
-
- if ( ( iter && inpseq == 0 ) || inGraph == 0 )
- {
- display_contig_usage ();
- exit ( -1 );
- }
- }
- }
-
- fprintf ( stderr, "\n\n" );
-
- if ( iter )
- {
- if ( maxk % 2 == 0 )
- {
- maxk++;
- fprintf ( stderr, "Max K should be an odd number, change to %d.\n", maxk );
- }
-
- if ( maxk < 13 )
- {
- maxk = 13;
- fprintf ( stderr, "Max K should not be less than 13, change to %d.\n", maxk );
- }
+ int copt;
+ int inpseq, outseq;
+ extern char *optarg;
+ char temp[100];
+ inpseq = outseq = repeatSolve = iter = cleanBubble = 0;//keepReadFile =
+ optind = 1;
+ fprintf ( stderr, "Parameters: contig " );
+
+ while ( ( copt = getopt ( argc, argv, "g:M:D:Rs:m:p:e:E" ) ) != EOF ) // r
+ {
+ switch ( copt )
+ {
+ case 'M':
+ fprintf ( stderr, "-M %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ M = atoi ( temp );
+ break;
+
+ case 'D':
+ fprintf ( stderr, "-D %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ deLowEdge = atoi ( temp ) >= 0 ? atoi ( temp ) : 0;
+ break;
+
+ case 'g':
+ fprintf ( stderr, "-g %s ", optarg );
+ inGraph = 1;
+ sscanf ( optarg, "%s", graphfile );
+ break;
+
+ case 'R':
+ repeatSolve = 1;
+ fprintf ( stderr, "-R " );
+ break;
+
+ case 's':
+ fprintf ( stderr, "-s %s ", optarg );
+ inpseq = 1;
+ sscanf ( optarg, "%s", shortrdsfile );
+ break;
+
+ case 'm':
+ fprintf ( stderr, "-m %s ", optarg );
+ iter = 1;
+ sscanf ( optarg, "%s", temp );
+ maxk = atoi ( temp );
+ break;
+
+ /*
+ case 'r':
+ keepReadFile = 1;
+ fprintf(stderr, "-r ");
+ break;
+ */
+ case 'e':
+ fprintf ( stderr, "-e %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ arcfilter = atoi ( temp );
+ break;
+
+ case 'p':
+ fprintf ( stderr, "-p %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ thrd_num = atoi ( temp );
+ break;
+
+ case 'E':
+ cleanBubble = 1;
+ fprintf ( stderr, "-E " );
+ break;
+
+ default:
+
+ if ( ( iter && inpseq == 0 ) || inGraph == 0 )
+ {
+ display_contig_usage ();
+ exit ( -1 );
+ }
+ }
+ }
+
+ fprintf ( stderr, "\n\n" );
+
+ if ( iter )
+ {
+ if ( maxk % 2 == 0 )
+ {
+ maxk++;
+ fprintf ( stderr, "Max K should be an odd number, change to %d.\n", maxk );
+ }
+
+ if ( maxk < 13 )
+ {
+ maxk = 13;
+ fprintf ( stderr, "Max K should not be less than 13, change to %d.\n", maxk );
+ }
#ifdef MER127
- else if ( maxk > 127 )
- {
- maxk = 127;
- fprintf ( stderr, "Max K should not be greater than 127, change to %d.\n", maxk );
- }
+ else if ( maxk > 127 )
+ {
+ maxk = 127;
+ fprintf ( stderr, "Max K should not be greater than 127, change to %d.\n", maxk );
+ }
#else
- else if ( maxk > 63 )
- {
- maxk = 63;
- fprintf ( stderr, "Max K should not be greater than 63, change to %d.\n", maxk );
- }
+ else if ( maxk > 63 )
+ {
+ maxk = 63;
+ fprintf ( stderr, "Max K should not be greater than 63, change to %d.\n", maxk );
+ }
#endif
- if ( maxk <= overlaplen )
- {
- fprintf ( stderr, "Max K %d is not greater than overlaplen %d.\n", maxk, overlaplen );
- display_contig_usage ();
- exit ( -1 );
- }
- }
-
- if ( ( iter && inpseq == 0 ) || inGraph == 0 )
- {
- display_contig_usage ();
- exit ( -1 );
- }
+ if ( maxk <= overlaplen )
+ {
+ fprintf ( stderr, "Max K %d is not greater than overlaplen %d.\n", maxk, overlaplen );
+ display_contig_usage ();
+ exit ( -1 );
+ }
+ }
+
+ if ( ( iter && inpseq == 0 ) || inGraph == 0 )
+ {
+ display_contig_usage ();
+ exit ( -1 );
+ }
}
static void display_contig_usage ()
{
- fprintf ( stderr, "\ncontig -g InputGraph [-R] [-M mergeLevel -D EdgeCovCutoff] [-s readsInfoFile -m maxkmer -p n_cpu -r]\n" );
- fprintf ( stderr, " -g <string> inputGraph: prefix of input graph file names\n" );
- fprintf ( stderr, " -R (optional) resolve repeats using information generated in pregraph step, works only if -R is set in pregraph step too, [NO]\n" );
- fprintf ( stderr, " -M <int> mergeLevel(min 0, max 3): the strength of merging similar sequences during contiging, [1]\n" );
- fprintf ( stderr, " -D <int> EdgeCovCutoff: edges shorter than (2*K+1) with coverage no larger than EdgeCovCutoff will be deleted, [1]\n" );
- fprintf ( stderr, " -e <int> arcWeight: two edges, between which the arc's weight is larger than arcWeight, will be linerized, [0]\n" );
- fprintf ( stderr, " -m <int> max k when using multi-kmer, and the parameters below are used along with multi-kmer, [NO]\n" );
- fprintf ( stderr, " -s <string> readsInfoFile:The file contains information of solexa reads(It's necessary when using multi-kmer)\n" );
- fprintf ( stderr, " -p <int> number of cpu, [8]\n" );
- fprintf ( stderr, " -E (optional) merge clean bubble before iterate, works only if -M is set when using multi-kmer, [NO]\n" );
- // fprintf (stderr," -r (optional) keep available read(*.read)\n");
+ fprintf ( stderr, "\ncontig -g InputGraph [-R] [-M mergeLevel -D EdgeCovCutoff] [-s readsInfoFile -m maxkmer -p n_cpu -r]\n" );
+ fprintf ( stderr, " -g <string> inputGraph: prefix of input graph file names\n" );
+ fprintf ( stderr, " -R (optional) resolve repeats using information generated in pregraph step, works only if -R is set in pregraph step too, [NO]\n" );
+ fprintf ( stderr, " -M <int> mergeLevel(min 0, max 3): the strength of merging similar sequences during contiging, [1]\n" );
+ fprintf ( stderr, " -D <int> EdgeCovCutoff: edges shorter than (2*K+1) with coverage no larger than EdgeCovCutoff will be deleted, [1]\n" );
+ fprintf ( stderr, " -e <int> arcWeight: two edges, between which the arc's weight is larger than arcWeight, will be linerized, [0]\n" );
+ fprintf ( stderr, " -m <int> max k when using multi-kmer, and the parameters below are used along with multi-kmer, [NO]\n" );
+ fprintf ( stderr, " -s <string> readsInfoFile:The file contains information of solexa reads(It's necessary when using multi-kmer)\n" );
+ fprintf ( stderr, " -p <int> number of cpu, [8]\n" );
+ fprintf ( stderr, " -E (optional) merge clean bubble before iterate, works only if -M is set when using multi-kmer, [NO]\n" );
+ // fprintf (stderr," -r (optional) keep available read(*.read)\n");
}
diff --git a/standardPregraph/cutTipPreGraph.c b/standardPregraph/cutTipPreGraph.c
index b1fb2c8..e1996ff 100644
--- a/standardPregraph/cutTipPreGraph.c
+++ b/standardPregraph/cutTipPreGraph.c
@@ -1,7 +1,7 @@
/*
* cutTipPreGraph.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -27,11 +27,11 @@
#include "extvab.h"
static int tip_c; //tips counter
-static long long * linearCounter; //counter for linear kmer node number
+static long long *linearCounter; //counter for linear kmer node number
static void Mark1in1outNode ();
-static void thread_mark ( KmerSet * set, unsigned char thrdID );
+static void thread_mark ( KmerSet *set, unsigned char thrdID );
/*
static void printKmer(Kmer kmer)
@@ -40,309 +40,309 @@ static void printKmer(Kmer kmer)
printf("\n");
}
*/
-static int clipTipFromNode ( kmer_t * node1, int cut_len, boolean THIN )
+static int clipTipFromNode ( kmer_t *node1, int cut_len, boolean THIN )
{
- unsigned char ret = 0, in_num, out_num, link;
- int sum, count;
- kmer_t * out_node;
- Kmer tempKmer, pre_word, word, bal_word;
- ubyte8 hash_ban;
- char ch1, ch;
- boolean smaller, found;
- int setPicker;
- unsigned int max_links, singleCvg;
- /*
- if (node1->linear || node1->deleted)
- {
- return ret;
- }
-
- if (THIN && !node1->single)
- {
- return ret;
- }
- */
- in_num = count_branch2prev ( node1 );
- out_num = count_branch2next ( node1 );
-
- if ( in_num == 0 && out_num == 1 )
- {
- pre_word = node1->seq;
-
- for ( ch1 = 0; ch1 < 4; ch1++ )
- {
- link = get_kmer_right_cov ( *node1, ch1 );
-
- if ( link )
- {
- break;
- }
- }
-
- word = nextKmer ( pre_word, ch1 );
- }
- else if ( in_num == 1 && out_num == 0 )
- {
- pre_word = reverseComplement ( node1->seq, overlaplen );
-
- for ( ch1 = 0; ch1 < 4; ch1++ )
- {
- link = get_kmer_left_cov ( *node1, ch1 );
-
- if ( link )
- {
- break;
- }
- }
-
- word = nextKmer ( pre_word, int_comp ( ch1 ) );
- }
- else
- {
- return ret;
- }
-
- count = 1;
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerLarger ( word, bal_word ) )
- {
- tempKmer = bal_word;
- bal_word = word;
- word = tempKmer;
- smaller = 0;
- }
- else
- {
- smaller = 1;
- }
-
- hash_ban = hash_kmer ( word );
- setPicker = hash_ban % thrd_num;
- found = search_kmerset ( KmerSets[setPicker], word, &out_node );
-
- if ( !found )
- {
- fprintf ( stderr, "Kmer " );
- PrintKmer ( stderr, word );
- fprintf ( stderr, " is not found, node1 " );
- PrintKmer ( stderr, node1->seq );
- fprintf ( stderr, " .\n" );
- /*
- #ifdef MER127
- fprintf (stderr,"kmer %llx%llx%llx%llx not found, node1 %llx%llx%llx%llx\n", word.high1, word.low1, word.high2, word.low2, node1->seq.high1, node1->seq.low1, node1->seq.high2,
- node1->seq.low2);
- #else
- fprintf (stderr,"kmer %llx%llx not found, node1 %llx%llx\n", word.high, word.low, node1->seq.high, node1->seq.low);
- #endif
- */
- exit ( 1 );
- }
-
- while ( out_node->linear )
- {
- count++;
-
- if ( THIN && !out_node->single )
- {
- break;
- }
-
- if ( count > cut_len )
- {
- return ret;
- }
-
- if ( smaller )
- {
- pre_word = word;
-
- for ( ch = 0; ch < 4; ch++ )
- {
- link = get_kmer_right_cov ( *out_node, ch );
-
- if ( link )
- {
- break;
- }
- }
-
- word = nextKmer ( pre_word, ch );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerLarger ( word, bal_word ) )
- {
- tempKmer = bal_word;
- bal_word = word;
- word = tempKmer;
- smaller = 0;
- }
- else
- {
- smaller = 1;
- }
-
- hash_ban = hash_kmer ( word );
- setPicker = hash_ban % thrd_num;
- found = search_kmerset ( KmerSets[setPicker], word, &out_node );
-
- if ( !found )
- {
- fprintf ( stderr, "Kmer " );
- PrintKmer ( stderr, word );
- fprintf ( stderr, " is not found, node1 " );
- PrintKmer ( stderr, node1->seq );
- fprintf ( stderr, " .\n" );
- fprintf ( stderr, "Pre_word " );
- PrintKmer ( stderr, pre_word );
- fprintf ( stderr, " with %d(smaller).\n", ch );
- /*
- #ifdef MER127
- fprintf (stderr,"kmer %llx%llx%llx%llx not found, node1 %llx%llx%llx%llx\n", word.high1, word.low1, word.high2, word.low2, node1->seq.high1, node1->seq.low1, node1->seq.high2,
- node1->seq.low2);
- fprintf (stderr,"pre_word %llx%llx%llx%llx with %d(smaller)\n", pre_word.high1, pre_word.low1, pre_word.high2, pre_word.low2, ch);
- #else
- fprintf (stderr,"kmer %llx%llx not found, node1 %llx%llx\n", word.high, word.low, node1->seq.high, node1->seq.low);
- fprintf (stderr,"pre_word %llx%llx with %d(smaller)\n", pre_word.high, pre_word.low, ch);
- #endif
- */
- exit ( 1 );
- }
- }
- else
- {
- pre_word = bal_word;
-
- for ( ch = 0; ch < 4; ch++ )
- {
- link = get_kmer_left_cov ( *out_node, ch );
-
- if ( link )
- {
- break;
- }
- }
-
- word = nextKmer ( pre_word, int_comp ( ch ) );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerLarger ( word, bal_word ) )
- {
- tempKmer = bal_word;
- bal_word = word;
- word = tempKmer;
- smaller = 0;
- }
- else
- {
- smaller = 1;
- }
-
- hash_ban = hash_kmer ( word );
- setPicker = hash_ban % thrd_num;
- found = search_kmerset ( KmerSets[setPicker], word, &out_node );
-
- if ( !found )
- {
- fprintf ( stderr, "Kmer " );
- PrintKmer ( stderr, word );
- fprintf ( stderr, " is not found, node1 " );
- PrintKmer ( stderr, node1->seq );
- fprintf ( stderr, " .\n" );
- fprintf ( stderr, "Pre_word " );
- PrintKmer ( stderr, reverseComplement ( pre_word, overlaplen ) );
- fprintf ( stderr, " with %d(larger).\n", int_comp ( ch ) );
- /*
- #ifdef MER127
- fprintf (stderr,"kmer %llx%llx%llx%llx not found, node1 %llx%llx%llx%llx\n", word.high1, word.low1, word.high2, word.low2, node1->seq.high1, node1->seq.low1, node1->seq.high2,
- node1->seq.low2);
- fprintf (stderr,"pre_word %llx%llx%llx%llx with %d(larger)\n", reverseComplement (pre_word, overlaplen).high1, reverseComplement (pre_word, overlaplen).low1,
- reverseComplement (pre_word, overlaplen).high2, reverseComplement (pre_word, overlaplen).low2, int_comp (ch));
- #else
- fprintf (stderr,"kmer %llx%llx not found, node1 %llx%llx\n", word.high, word.low, node1->seq.high, node1->seq.low);
- fprintf (stderr,"pre_word %llx%llx with %d(larger)\n", reverseComplement (pre_word, overlaplen).high, reverseComplement (pre_word, overlaplen).low, int_comp (ch));
- #endif
- */
- exit ( 1 );
- }
- }
- }
-
- if ( ( sum = count_branch2next ( out_node ) + count_branch2prev ( out_node ) ) == 1 )
- {
- tip_c++;
- node1->deleted = 1;
- out_node->deleted = 1;
- return 1;
- }
- else
- {
- ch = firstCharInKmer ( pre_word );
-
- if ( THIN )
- {
- tip_c++;
- node1->deleted = 1;
- dislink2prevUncertain ( out_node, ch, smaller );
- out_node->linear = 0;
- return 1;
- }
-
- // make sure this tip doesn't provide most links to out_node
- max_links = 0;
-
- for ( ch1 = 0; ch1 < 4; ch1++ )
- {
- if ( smaller )
- {
- singleCvg = get_kmer_left_cov ( *out_node, ch1 );
-
- if ( singleCvg > max_links )
- {
- max_links = singleCvg;
- }
- }
- else
- {
- singleCvg = get_kmer_right_cov ( *out_node, ch1 );
-
- if ( singleCvg > max_links )
- {
- max_links = singleCvg;
- }
- }
- }
-
- if ( smaller && get_kmer_left_cov ( *out_node, ch ) < max_links )
- {
- tip_c++;
- node1->deleted = 1;
- dislink2prevUncertain ( out_node, ch, smaller );
-
- if ( count_branch2prev ( out_node ) == 1 && count_branch2next ( out_node ) == 1 )
- {
- out_node->linear = 1;
- }
-
- return 1;
- }
-
- if ( !smaller && get_kmer_right_cov ( *out_node, int_comp ( ch ) ) < max_links )
- {
- tip_c++;
- node1->deleted = 1;
- dislink2prevUncertain ( out_node, ch, smaller );
-
- if ( count_branch2prev ( out_node ) == 1 && count_branch2next ( out_node ) == 1 )
- {
- out_node->linear = 1;
- }
-
- return 1;
- }
- }
-
- return 0;
+ unsigned char ret = 0, in_num, out_num, link;
+ int sum, count;
+ kmer_t *out_node;
+ Kmer tempKmer, pre_word, word, bal_word;
+ ubyte8 hash_ban;
+ char ch1, ch;
+ boolean smaller, found;
+ int setPicker;
+ unsigned int max_links, singleCvg;
+ /*
+ if (node1->linear || node1->deleted)
+ {
+ return ret;
+ }
+
+ if (THIN && !node1->single)
+ {
+ return ret;
+ }
+ */
+ in_num = count_branch2prev ( node1 );
+ out_num = count_branch2next ( node1 );
+
+ if ( in_num == 0 && out_num == 1 )
+ {
+ pre_word = node1->seq;
+
+ for ( ch1 = 0; ch1 < 4; ch1++ )
+ {
+ link = get_kmer_right_cov ( *node1, ch1 );
+
+ if ( link )
+ {
+ break;
+ }
+ }
+
+ word = nextKmer ( pre_word, ch1 );
+ }
+ else if ( in_num == 1 && out_num == 0 )
+ {
+ pre_word = reverseComplement ( node1->seq, overlaplen );
+
+ for ( ch1 = 0; ch1 < 4; ch1++ )
+ {
+ link = get_kmer_left_cov ( *node1, ch1 );
+
+ if ( link )
+ {
+ break;
+ }
+ }
+
+ word = nextKmer ( pre_word, int_comp ( ch1 ) );
+ }
+ else
+ {
+ return ret;
+ }
+
+ count = 1;
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerLarger ( word, bal_word ) )
+ {
+ tempKmer = bal_word;
+ bal_word = word;
+ word = tempKmer;
+ smaller = 0;
+ }
+ else
+ {
+ smaller = 1;
+ }
+
+ hash_ban = hash_kmer ( word );
+ setPicker = hash_ban % thrd_num;
+ found = search_kmerset ( KmerSets[setPicker], word, &out_node );
+
+ if ( !found )
+ {
+ fprintf ( stderr, "Kmer " );
+ PrintKmer ( stderr, word );
+ fprintf ( stderr, " is not found, node1 " );
+ PrintKmer ( stderr, node1->seq );
+ fprintf ( stderr, " .\n" );
+ /*
+ #ifdef MER127
+ fprintf (stderr,"kmer %llx%llx%llx%llx not found, node1 %llx%llx%llx%llx\n", word.high1, word.low1, word.high2, word.low2, node1->seq.high1, node1->seq.low1, node1->seq.high2,
+ node1->seq.low2);
+ #else
+ fprintf (stderr,"kmer %llx%llx not found, node1 %llx%llx\n", word.high, word.low, node1->seq.high, node1->seq.low);
+ #endif
+ */
+ exit ( 1 );
+ }
+
+ while ( out_node->linear )
+ {
+ count++;
+
+ if ( THIN && !out_node->single )
+ {
+ break;
+ }
+
+ if ( count > cut_len )
+ {
+ return ret;
+ }
+
+ if ( smaller )
+ {
+ pre_word = word;
+
+ for ( ch = 0; ch < 4; ch++ )
+ {
+ link = get_kmer_right_cov ( *out_node, ch );
+
+ if ( link )
+ {
+ break;
+ }
+ }
+
+ word = nextKmer ( pre_word, ch );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerLarger ( word, bal_word ) )
+ {
+ tempKmer = bal_word;
+ bal_word = word;
+ word = tempKmer;
+ smaller = 0;
+ }
+ else
+ {
+ smaller = 1;
+ }
+
+ hash_ban = hash_kmer ( word );
+ setPicker = hash_ban % thrd_num;
+ found = search_kmerset ( KmerSets[setPicker], word, &out_node );
+
+ if ( !found )
+ {
+ fprintf ( stderr, "Kmer " );
+ PrintKmer ( stderr, word );
+ fprintf ( stderr, " is not found, node1 " );
+ PrintKmer ( stderr, node1->seq );
+ fprintf ( stderr, " .\n" );
+ fprintf ( stderr, "Pre_word " );
+ PrintKmer ( stderr, pre_word );
+ fprintf ( stderr, " with %d(smaller).\n", ch );
+ /*
+ #ifdef MER127
+ fprintf (stderr,"kmer %llx%llx%llx%llx not found, node1 %llx%llx%llx%llx\n", word.high1, word.low1, word.high2, word.low2, node1->seq.high1, node1->seq.low1, node1->seq.high2,
+ node1->seq.low2);
+ fprintf (stderr,"pre_word %llx%llx%llx%llx with %d(smaller)\n", pre_word.high1, pre_word.low1, pre_word.high2, pre_word.low2, ch);
+ #else
+ fprintf (stderr,"kmer %llx%llx not found, node1 %llx%llx\n", word.high, word.low, node1->seq.high, node1->seq.low);
+ fprintf (stderr,"pre_word %llx%llx with %d(smaller)\n", pre_word.high, pre_word.low, ch);
+ #endif
+ */
+ exit ( 1 );
+ }
+ }
+ else
+ {
+ pre_word = bal_word;
+
+ for ( ch = 0; ch < 4; ch++ )
+ {
+ link = get_kmer_left_cov ( *out_node, ch );
+
+ if ( link )
+ {
+ break;
+ }
+ }
+
+ word = nextKmer ( pre_word, int_comp ( ch ) );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerLarger ( word, bal_word ) )
+ {
+ tempKmer = bal_word;
+ bal_word = word;
+ word = tempKmer;
+ smaller = 0;
+ }
+ else
+ {
+ smaller = 1;
+ }
+
+ hash_ban = hash_kmer ( word );
+ setPicker = hash_ban % thrd_num;
+ found = search_kmerset ( KmerSets[setPicker], word, &out_node );
+
+ if ( !found )
+ {
+ fprintf ( stderr, "Kmer " );
+ PrintKmer ( stderr, word );
+ fprintf ( stderr, " is not found, node1 " );
+ PrintKmer ( stderr, node1->seq );
+ fprintf ( stderr, " .\n" );
+ fprintf ( stderr, "Pre_word " );
+ PrintKmer ( stderr, reverseComplement ( pre_word, overlaplen ) );
+ fprintf ( stderr, " with %d(larger).\n", int_comp ( ch ) );
+ /*
+ #ifdef MER127
+ fprintf (stderr,"kmer %llx%llx%llx%llx not found, node1 %llx%llx%llx%llx\n", word.high1, word.low1, word.high2, word.low2, node1->seq.high1, node1->seq.low1, node1->seq.high2,
+ node1->seq.low2);
+ fprintf (stderr,"pre_word %llx%llx%llx%llx with %d(larger)\n", reverseComplement (pre_word, overlaplen).high1, reverseComplement (pre_word, overlaplen).low1,
+ reverseComplement (pre_word, overlaplen).high2, reverseComplement (pre_word, overlaplen).low2, int_comp (ch));
+ #else
+ fprintf (stderr,"kmer %llx%llx not found, node1 %llx%llx\n", word.high, word.low, node1->seq.high, node1->seq.low);
+ fprintf (stderr,"pre_word %llx%llx with %d(larger)\n", reverseComplement (pre_word, overlaplen).high, reverseComplement (pre_word, overlaplen).low, int_comp (ch));
+ #endif
+ */
+ exit ( 1 );
+ }
+ }
+ }
+
+ if ( ( sum = count_branch2next ( out_node ) + count_branch2prev ( out_node ) ) == 1 )
+ {
+ tip_c++;
+ node1->deleted = 1;
+ out_node->deleted = 1;
+ return 1;
+ }
+ else
+ {
+ ch = firstCharInKmer ( pre_word );
+
+ if ( THIN )
+ {
+ tip_c++;
+ node1->deleted = 1;
+ dislink2prevUncertain ( out_node, ch, smaller );
+ out_node->linear = 0;
+ return 1;
+ }
+
+ // make sure this tip doesn't provide most links to out_node
+ max_links = 0;
+
+ for ( ch1 = 0; ch1 < 4; ch1++ )
+ {
+ if ( smaller )
+ {
+ singleCvg = get_kmer_left_cov ( *out_node, ch1 );
+
+ if ( singleCvg > max_links )
+ {
+ max_links = singleCvg;
+ }
+ }
+ else
+ {
+ singleCvg = get_kmer_right_cov ( *out_node, ch1 );
+
+ if ( singleCvg > max_links )
+ {
+ max_links = singleCvg;
+ }
+ }
+ }
+
+ if ( smaller && get_kmer_left_cov ( *out_node, ch ) < max_links )
+ {
+ tip_c++;
+ node1->deleted = 1;
+ dislink2prevUncertain ( out_node, ch, smaller );
+
+ if ( count_branch2prev ( out_node ) == 1 && count_branch2next ( out_node ) == 1 )
+ {
+ out_node->linear = 1;
+ }
+
+ return 1;
+ }
+
+ if ( !smaller && get_kmer_right_cov ( *out_node, int_comp ( ch ) ) < max_links )
+ {
+ tip_c++;
+ node1->deleted = 1;
+ dislink2prevUncertain ( out_node, ch, smaller );
+
+ if ( count_branch2prev ( out_node ) == 1 && count_branch2next ( out_node ) == 1 )
+ {
+ out_node->linear = 1;
+ }
+
+ return 1;
+ }
+ }
+
+ return 0;
}
@@ -362,40 +362,40 @@ Return:
void removeSingleTips ()
{
- int i, flag = 0, cut_len_tip;
- kmer_t * rs;
- KmerSet * set;
- //count_ends(hash_table);
- cut_len_tip = 2 * overlaplen; // >= maxReadLen4all-overlaplen+1 ? 2*overlaplen : maxReadLen4all-overlaplen+1;
- //if(cut_len_tip > 100) cut_len_tip = 100;
- fprintf ( stderr, "Start to remove frequency-one-kmer tips shorter than %d.\n", cut_len_tip );
- tip_c = 0;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- set = KmerSets[i];
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- rs = set->array + set->iter_ptr;
-
- if ( !rs->linear && !rs->deleted && rs->single )
- {
- flag += clipTipFromNode ( rs, cut_len_tip, 1 );
- }
-
- // flag += clipTipFromNode (rs, cut_len_tip, 1);
- }
-
- set->iter_ptr++;
- }
- }
-
- fprintf ( stderr, "Total %d tip(s) removed.\n", tip_c );
- Mark1in1outNode ();
+ int i, flag = 0, cut_len_tip;
+ kmer_t *rs;
+ KmerSet *set;
+ //count_ends(hash_table);
+ cut_len_tip = 2 * overlaplen; // >= maxReadLen4all-overlaplen+1 ? 2*overlaplen : maxReadLen4all-overlaplen+1;
+ //if(cut_len_tip > 100) cut_len_tip = 100;
+ fprintf ( stderr, "Start to remove frequency-one-kmer tips shorter than %d.\n", cut_len_tip );
+ tip_c = 0;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ set = KmerSets[i];
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ rs = set->array + set->iter_ptr;
+
+ if ( !rs->linear && !rs->deleted && rs->single )
+ {
+ flag += clipTipFromNode ( rs, cut_len_tip, 1 );
+ }
+
+ // flag += clipTipFromNode (rs, cut_len_tip, 1);
+ }
+
+ set->iter_ptr++;
+ }
+ }
+
+ fprintf ( stderr, "Total %d tip(s) removed.\n", tip_c );
+ Mark1in1outNode ();
}
@@ -413,227 +413,227 @@ Return:
*************************************************/
void removeMinorTips ()
{
- int i, flag = 0, cut_len_tip;
- kmer_t * rs;
- KmerSet * set;
- //count_ends(hash_table);
- //cut_len_tip = 2*overlaplen >= maxReadLen4all-overlaplen+1 ? 2*overlaplen : maxReadLen4all-overlaplen+1;
- cut_len_tip = 2 * overlaplen;
- //if(cut_len_tip > 100) cut_len_tip = 100;
- fprintf ( stderr, "Start to remove tips with minority links.\n" );
- tip_c = 0;
- flag = 1;
- int round = 1;
-
- while ( flag )
- {
- flag = 0;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- set = KmerSets[i];
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- rs = set->array + set->iter_ptr;
-
- if ( !rs->linear && !rs->deleted )
- {
- flag += clipTipFromNode ( rs, cut_len_tip, 0 );
- }
-
- // flag += clipTipFromNode (rs, cut_len_tip, 0);
- }
-
- set->iter_ptr++;
- }
-
- // fprintf (stderr,"Remove minor tips in kmer set %d is done.\n", i);
- }
-
- fprintf ( stderr, "%d tip(s) removed in cycle %d.\n", flag, round++ );
- }
-
- /*
- for (i = 0; i < thrd_num; i++)
- {
- set = KmerSets[i];
- flag = 1;
-
- while (flag)
- {
- flag = 0;
- set->iter_ptr = 0;
-
- while (set->iter_ptr < set->size)
- {
- if (!is_kmer_entity_null (set->flags, set->iter_ptr))
- {
- rs = set->array + set->iter_ptr;
- flag += clipTipFromNode (rs, cut_len_tip, 0);
- }
-
- set->iter_ptr++;
- }
- }
-
- fprintf (stderr,"Remove minor tips in kmer set %d is done.\n", i);
- }
- */
- fprintf ( stderr, "Total %d tip(s) removed.\n", tip_c );
- Mark1in1outNode ();
+ int i, flag = 0, cut_len_tip;
+ kmer_t *rs;
+ KmerSet *set;
+ //count_ends(hash_table);
+ //cut_len_tip = 2*overlaplen >= maxReadLen4all-overlaplen+1 ? 2*overlaplen : maxReadLen4all-overlaplen+1;
+ cut_len_tip = 2 * overlaplen;
+ //if(cut_len_tip > 100) cut_len_tip = 100;
+ fprintf ( stderr, "Start to remove tips with minority links.\n" );
+ tip_c = 0;
+ flag = 1;
+ int round = 1;
+
+ while ( flag )
+ {
+ flag = 0;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ set = KmerSets[i];
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ rs = set->array + set->iter_ptr;
+
+ if ( !rs->linear && !rs->deleted )
+ {
+ flag += clipTipFromNode ( rs, cut_len_tip, 0 );
+ }
+
+ // flag += clipTipFromNode (rs, cut_len_tip, 0);
+ }
+
+ set->iter_ptr++;
+ }
+
+ // fprintf (stderr,"Remove minor tips in kmer set %d is done.\n", i);
+ }
+
+ fprintf ( stderr, "%d tip(s) removed in cycle %d.\n", flag, round++ );
+ }
+
+ /*
+ for (i = 0; i < thrd_num; i++)
+ {
+ set = KmerSets[i];
+ flag = 1;
+
+ while (flag)
+ {
+ flag = 0;
+ set->iter_ptr = 0;
+
+ while (set->iter_ptr < set->size)
+ {
+ if (!is_kmer_entity_null (set->flags, set->iter_ptr))
+ {
+ rs = set->array + set->iter_ptr;
+ flag += clipTipFromNode (rs, cut_len_tip, 0);
+ }
+
+ set->iter_ptr++;
+ }
+ }
+
+ fprintf (stderr,"Remove minor tips in kmer set %d is done.\n", i);
+ }
+ */
+ fprintf ( stderr, "Total %d tip(s) removed.\n", tip_c );
+ Mark1in1outNode ();
}
-static void threadRoutine ( void * para )
+static void threadRoutine ( void *para )
{
- PARAMETER * prm;
- unsigned char id;
- prm = ( PARAMETER * ) para;
- id = prm->threadID;
-
- //printf("%dth thread with threadID %d, hash_table %p\n",id,prm.threadID,prm.hash_table);
- while ( 1 )
- {
- if ( * ( prm->selfSignal ) == 2 )
- {
- * ( prm->selfSignal ) = 0;
- break;
- }
- else if ( * ( prm->selfSignal ) == 1 )
- {
- thread_mark ( KmerSets[id], id );
- * ( prm->selfSignal ) = 0;
- }
-
- usleep ( 1 );
- }
+ PARAMETER *prm;
+ unsigned char id;
+ prm = ( PARAMETER * ) para;
+ id = prm->threadID;
+
+ //printf("%dth thread with threadID %d, hash_table %p\n",id,prm.threadID,prm.hash_table);
+ while ( 1 )
+ {
+ if ( * ( prm->selfSignal ) == 2 )
+ {
+ * ( prm->selfSignal ) = 0;
+ break;
+ }
+ else if ( * ( prm->selfSignal ) == 1 )
+ {
+ thread_mark ( KmerSets[id], id );
+ * ( prm->selfSignal ) = 0;
+ }
+
+ usleep ( 1 );
+ }
}
-static void creatThrds ( pthread_t * threads, PARAMETER * paras )
+static void creatThrds ( pthread_t *threads, PARAMETER *paras )
{
- unsigned char i;
- int temp;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
- {
- fprintf ( stderr, "Create threads failed.\n" );
- exit ( 1 );
- }
- }
-
- fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
+ unsigned char i;
+ int temp;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
+ {
+ fprintf ( stderr, "Create threads failed.\n" );
+ exit ( 1 );
+ }
+ }
+
+ fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
}
-static void thread_mark ( KmerSet * set, unsigned char thrdID )
+static void thread_mark ( KmerSet *set, unsigned char thrdID )
{
- int in_num, out_num;
- kmer_t * rs;
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- rs = set->array + set->iter_ptr;
-
- if ( rs->deleted || rs->linear )
- {
- set->iter_ptr++;
- continue;;
- }
-
- in_num = count_branch2prev ( rs );
- out_num = count_branch2next ( rs );
-
- if ( in_num == 1 && out_num == 1 )
- {
- rs->linear = 1;
- linearCounter[thrdID]++;
- }
- }
-
- set->iter_ptr++;
- }
-
- //printf("%lld more linear\n",linearCounter[thrdID]);
+ int in_num, out_num;
+ kmer_t *rs;
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ rs = set->array + set->iter_ptr;
+
+ if ( rs->deleted || rs->linear )
+ {
+ set->iter_ptr++;
+ continue;;
+ }
+
+ in_num = count_branch2prev ( rs );
+ out_num = count_branch2next ( rs );
+
+ if ( in_num == 1 && out_num == 1 )
+ {
+ rs->linear = 1;
+ linearCounter[thrdID]++;
+ }
+ }
+
+ set->iter_ptr++;
+ }
+
+ //printf("%lld more linear\n",linearCounter[thrdID]);
}
-static void thread_wait ( pthread_t * threads )
+static void thread_wait ( pthread_t *threads )
{
- int i;
+ int i;
- for ( i = 0; i < thrd_num; i++ )
- if ( threads[i] != 0 )
- {
- pthread_join ( threads[i], NULL );
- }
+ for ( i = 0; i < thrd_num; i++ )
+ if ( threads[i] != 0 )
+ {
+ pthread_join ( threads[i], NULL );
+ }
}
-static void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals )
+static void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals )
{
- int t;
-
- for ( t = 0; t < thrd_num; t++ )
- {
- thrdSignals[t + 1] = SIG;
- }
-
- while ( 1 )
- {
- usleep ( 10 );
-
- for ( t = 0; t < thrd_num; t++ )
- if ( thrdSignals[t + 1] )
- {
- break;
- }
-
- if ( t == thrd_num )
- {
- break;
- }
- }
+ int t;
+
+ for ( t = 0; t < thrd_num; t++ )
+ {
+ thrdSignals[t + 1] = SIG;
+ }
+
+ while ( 1 )
+ {
+ usleep ( 10 );
+
+ for ( t = 0; t < thrd_num; t++ )
+ if ( thrdSignals[t + 1] )
+ {
+ break;
+ }
+
+ if ( t == thrd_num )
+ {
+ break;
+ }
+ }
}
static void Mark1in1outNode ()
{
- int i;
- long long counter = 0;
- pthread_t threads[thrd_num];
- unsigned char thrdSignal[thrd_num + 1];
- PARAMETER paras[thrd_num];
-
- for ( i = 0; i < thrd_num; i++ )
- {
- thrdSignal[i + 1] = 0;
- paras[i].threadID = i;
- paras[i].mainSignal = &thrdSignal[0];
- paras[i].selfSignal = &thrdSignal[i + 1];
- }
-
- creatThrds ( threads, paras );
- thrdSignal[0] = 0;
- linearCounter = ( long long * ) ckalloc ( thrd_num * sizeof ( long long ) );
-
- for ( i = 0; i < thrd_num; i++ )
- {
- linearCounter[i] = 0;
- }
-
- sendWorkSignal ( 1, thrdSignal ); //mark linear nodes
- sendWorkSignal ( 2, thrdSignal ); //stop threads
- thread_wait ( threads );
-
- for ( i = 0; i < thrd_num; i++ )
- {
- counter += linearCounter[i];
- }
-
- free ( ( void * ) linearCounter );
- fprintf ( stderr, "%lld linear node(s) marked.\n", counter );
+ int i;
+ long long counter = 0;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ }
+
+ creatThrds ( threads, paras );
+ thrdSignal[0] = 0;
+ linearCounter = ( long long * ) ckalloc ( thrd_num * sizeof ( long long ) );
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ linearCounter[i] = 0;
+ }
+
+ sendWorkSignal ( 1, thrdSignal ); //mark linear nodes
+ sendWorkSignal ( 2, thrdSignal ); //stop threads
+ thread_wait ( threads );
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ counter += linearCounter[i];
+ }
+
+ free ( ( void * ) linearCounter );
+ fprintf ( stderr, "%lld linear node(s) marked.\n", counter );
}
diff --git a/standardPregraph/cutTip_graph.c b/standardPregraph/cutTip_graph.c
index 651c1f8..5e39662 100644
--- a/standardPregraph/cutTip_graph.c
+++ b/standardPregraph/cutTip_graph.c
@@ -1,7 +1,7 @@
/*
* cutTip_graph.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -42,38 +42,38 @@ Return:
*************************************************/
void destroyEdge ( unsigned int edgeid )
{
- unsigned int bal_ed = getTwinEdge ( edgeid );
- ARC * arc;
-
- if ( bal_ed == edgeid )
- {
- edge_array[edgeid].length = 0;
- return;
- }
-
- arc = edge_array[edgeid].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = 0;
- arc = arc->next;
- }
-
- arc = edge_array[bal_ed].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = 0;
- arc = arc->next;
- }
-
- edge_array[edgeid].arcs = NULL;
- edge_array[bal_ed].arcs = NULL;
- edge_array[edgeid].length = 0;
- edge_array[bal_ed].length = 0;
- edge_array[edgeid].deleted = 1;
- edge_array[bal_ed].deleted = 1;
- //printf("Destroyed %d and %d\n",edgeid,bal_ed);
+ unsigned int bal_ed = getTwinEdge ( edgeid );
+ ARC *arc;
+
+ if ( bal_ed == edgeid )
+ {
+ edge_array[edgeid].length = 0;
+ return;
+ }
+
+ arc = edge_array[edgeid].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = 0;
+ arc = arc->next;
+ }
+
+ arc = edge_array[bal_ed].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = 0;
+ arc = arc->next;
+ }
+
+ edge_array[edgeid].arcs = NULL;
+ edge_array[bal_ed].arcs = NULL;
+ edge_array[edgeid].length = 0;
+ edge_array[bal_ed].length = 0;
+ edge_array[edgeid].deleted = 1;
+ edge_array[bal_ed].deleted = 1;
+ //printf("Destroyed %d and %d\n",edgeid,bal_ed);
}
/*************************************************
@@ -89,35 +89,35 @@ Output:
Return:
The first arc of the edge.
*************************************************/
-ARC * arcCount ( unsigned int edgeid, unsigned int * num )
+ARC *arcCount ( unsigned int edgeid, unsigned int *num )
{
- ARC * arc;
- ARC * firstValidArc = NULL;
- unsigned int count = 0;
- arc = edge_array[edgeid].arcs;
-
- while ( arc )
- {
- if ( arc->to_ed > 0 )
- {
- count++;
-
- if ( count == 1 )
- {
- firstValidArc = arc;
- }
- else if ( count > 1 )
- {
- *num = count;
- return firstValidArc;
- }
- }
-
- arc = arc->next;
- }
-
- *num = count;
- return firstValidArc;
+ ARC *arc;
+ ARC *firstValidArc = NULL;
+ unsigned int count = 0;
+ arc = edge_array[edgeid].arcs;
+
+ while ( arc )
+ {
+ if ( arc->to_ed > 0 )
+ {
+ count++;
+
+ if ( count == 1 )
+ {
+ firstValidArc = arc;
+ }
+ else if ( count > 1 )
+ {
+ *num = count;
+ return firstValidArc;
+ }
+ }
+
+ arc = arc->next;
+ }
+
+ *num = count;
+ return firstValidArc;
}
/* multiplicity < multiCutoff
@@ -139,53 +139,53 @@ Return:
*************************************************/
void removeWeakEdges ( int lenCutoff, unsigned int multiCutoff )
{
- unsigned int bal_ed;
- unsigned int arcRight_n, arcLeft_n;
- ARC * arcLeft, *arcRight;
- unsigned int i;
- int counter = 0;
- int round = 1;
- fprintf ( stderr, "Start to destroy weak inner edges.\n" );
- counter = 1;
-
- while ( counter )
- {
- counter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin ( i ) )
- {
- continue;
- }
-
- bal_ed = getTwinEdge ( i );
- arcRight = arcCount ( i, &arcRight_n );
-
- if ( arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff )
- {
- continue;
- }
-
- arcLeft = arcCount ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff )
- {
- continue;
- }
-
- destroyEdge ( i );
- counter++;
- }
-
- fprintf ( stderr, "%d weak inner edge(s) destroyed in cycle %d.\n", counter, round++ );
- }
-
- removeDeadArcs ();
- /*
- linearConcatenate();
- compactEdgeArray();
- */
+ unsigned int bal_ed;
+ unsigned int arcRight_n, arcLeft_n;
+ ARC *arcLeft, *arcRight;
+ unsigned int i;
+ int counter = 0;
+ int round = 1;
+ fprintf ( stderr, "Start to destroy weak inner edges.\n" );
+ counter = 1;
+
+ while ( counter )
+ {
+ counter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].deleted || edge_array[i].length == 0 || edge_array[i].length > lenCutoff || EdSameAsTwin ( i ) )
+ {
+ continue;
+ }
+
+ bal_ed = getTwinEdge ( i );
+ arcRight = arcCount ( i, &arcRight_n );
+
+ if ( arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff )
+ {
+ continue;
+ }
+
+ arcLeft = arcCount ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff )
+ {
+ continue;
+ }
+
+ destroyEdge ( i );
+ counter++;
+ }
+
+ fprintf ( stderr, "%d weak inner edge(s) destroyed in cycle %d.\n", counter, round++ );
+ }
+
+ removeDeadArcs ();
+ /*
+ linearConcatenate();
+ compactEdgeArray();
+ */
}
/*
@@ -221,36 +221,36 @@ Return:
*************************************************/
void removeLowCovEdges ( int lenCutoff, unsigned short covCutoff, boolean last )
{
- unsigned int bal_ed;
- unsigned int arcRight_n, arcLeft_n;
- ARC * arcLeft, *arcRight;
- unsigned int i;
- int counter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].deleted || edge_array[i].cvg == 0 || edge_array[i].cvg > covCutoff * 10 || edge_array[i].length >= lenCutoff || EdSameAsTwin ( i ) || edge_array[i].length == 0 )
- {
- continue;
- }
-
- bal_ed = getTwinEdge ( i );
- arcRight = arcCount ( i, &arcRight_n );
- arcLeft = arcCount ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n < 1 || arcRight_n < 1 )
- {
- continue;
- }
-
- destroyEdge ( i );
- counter++;
- }
-
- fprintf ( stderr, "%d inner edge(s) with coverage lower than or equal to %d destroyed.\n", counter, covCutoff );
- removeDeadArcs ();
- linearConcatenate ( 0, last );
- compactEdgeArray ();
+ unsigned int bal_ed;
+ unsigned int arcRight_n, arcLeft_n;
+ ARC *arcLeft, *arcRight;
+ unsigned int i;
+ int counter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].deleted || edge_array[i].cvg == 0 || edge_array[i].cvg > covCutoff * 10 || edge_array[i].length >= lenCutoff || EdSameAsTwin ( i ) || edge_array[i].length == 0 )
+ {
+ continue;
+ }
+
+ bal_ed = getTwinEdge ( i );
+ arcRight = arcCount ( i, &arcRight_n );
+ arcLeft = arcCount ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n < 1 || arcRight_n < 1 )
+ {
+ continue;
+ }
+
+ destroyEdge ( i );
+ counter++;
+ }
+
+ fprintf ( stderr, "%d inner edge(s) with coverage lower than or equal to %d destroyed.\n", counter, covCutoff );
+ removeDeadArcs ();
+ linearConcatenate ( 0, last );
+ compactEdgeArray ();
}
/*************************************************
@@ -269,208 +269,208 @@ Return:
*************************************************/
boolean isUnreliableTip ( unsigned int edgeid, int cutLen, boolean strict )
{
- unsigned int arcRight_n, arcLeft_n;
- unsigned int bal_ed;
- unsigned int currentEd = edgeid;
- int length = 0;
- unsigned int mult = 0;
- ARC * arc, *activeArc = NULL, *tempArc;
-
- if ( edgeid == 0 )
- {
- return 0;
- }
-
- bal_ed = getTwinEdge ( edgeid );
-
- if ( bal_ed == edgeid )
- {
- return 0;
- }
-
- arcCount ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n > 0 )
- {
- return 0;
- }
-
- while ( currentEd )
- {
- arcCount ( bal_ed, &arcLeft_n );
- tempArc = arcCount ( currentEd, &arcRight_n );
-
- if ( arcLeft_n > 1 || arcRight_n > 1 )
- {
- break;
- }
-
- length += edge_array[currentEd].length;
-
- if ( tempArc )
- {
- activeArc = tempArc;
- currentEd = activeArc->to_ed;
- bal_ed = getTwinEdge ( currentEd );
- }
- else
- {
- currentEd = 0;
- }
- }
-
- if ( length >= cutLen )
- {
- return 0;
- }
-
- if ( currentEd == 0 )
- {
- caseB++;
- return 1;
- }
-
- if ( !strict )
- {
- if ( arcLeft_n < 2 )
- {
- length += edge_array[currentEd].length;
- }
-
- if ( length >= cutLen )
- {
- return 0;
- }
- else
- {
- caseC++;
- return 1;
- }
- }
-
- if ( arcLeft_n < 2 )
- {
- return 0;
- }
-
- if ( !activeArc )
- {
- fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid );
- }
-
- if ( activeArc->multiplicity == 1 )
- {
- caseD++;
- return 1;
- }
-
- for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
- if ( arc->multiplicity > mult )
- {
- mult = arc->multiplicity;
- }
-
- if ( mult > activeArc->multiplicity )
- {
- caseE++;
- }
-
- return mult > activeArc->multiplicity;
+ unsigned int arcRight_n, arcLeft_n;
+ unsigned int bal_ed;
+ unsigned int currentEd = edgeid;
+ int length = 0;
+ unsigned int mult = 0;
+ ARC *arc, *activeArc = NULL, *tempArc;
+
+ if ( edgeid == 0 )
+ {
+ return 0;
+ }
+
+ bal_ed = getTwinEdge ( edgeid );
+
+ if ( bal_ed == edgeid )
+ {
+ return 0;
+ }
+
+ arcCount ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n > 0 )
+ {
+ return 0;
+ }
+
+ while ( currentEd )
+ {
+ arcCount ( bal_ed, &arcLeft_n );
+ tempArc = arcCount ( currentEd, &arcRight_n );
+
+ if ( arcLeft_n > 1 || arcRight_n > 1 )
+ {
+ break;
+ }
+
+ length += edge_array[currentEd].length;
+
+ if ( tempArc )
+ {
+ activeArc = tempArc;
+ currentEd = activeArc->to_ed;
+ bal_ed = getTwinEdge ( currentEd );
+ }
+ else
+ {
+ currentEd = 0;
+ }
+ }
+
+ if ( length >= cutLen )
+ {
+ return 0;
+ }
+
+ if ( currentEd == 0 )
+ {
+ caseB++;
+ return 1;
+ }
+
+ if ( !strict )
+ {
+ if ( arcLeft_n < 2 )
+ {
+ length += edge_array[currentEd].length;
+ }
+
+ if ( length >= cutLen )
+ {
+ return 0;
+ }
+ else
+ {
+ caseC++;
+ return 1;
+ }
+ }
+
+ if ( arcLeft_n < 2 )
+ {
+ return 0;
+ }
+
+ if ( !activeArc )
+ {
+ fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid );
+ }
+
+ if ( activeArc->multiplicity == 1 )
+ {
+ caseD++;
+ return 1;
+ }
+
+ for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
+ if ( arc->multiplicity > mult )
+ {
+ mult = arc->multiplicity;
+ }
+
+ if ( mult > activeArc->multiplicity )
+ {
+ caseE++;
+ }
+
+ return mult > activeArc->multiplicity;
}
boolean isUnreliableTip_strict ( unsigned int edgeid, int cutLen )
{
- unsigned int arcRight_n, arcLeft_n;
- unsigned int bal_ed;
- unsigned int currentEd = edgeid;
- int length = 0;
- unsigned int mult = 0;
- ARC * arc, *activeArc = NULL, *tempArc;
-
- if ( edgeid == 0 )
- {
- return 0;
- }
-
- bal_ed = getTwinEdge ( edgeid );
-
- if ( bal_ed == edgeid )
- {
- return 0;
- }
-
- arcCount ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n > 0 )
- {
- return 0;
- }
-
- while ( currentEd )
- {
- arcCount ( bal_ed, &arcLeft_n );
- tempArc = arcCount ( currentEd, &arcRight_n );
-
- if ( arcLeft_n > 1 || arcRight_n > 1 )
- {
- if ( arcLeft_n == 0 || length == 0 )
- {
- return 0;
- }
- else
- {
- break;
- }
- }
-
- length += edge_array[currentEd].length;
-
- if ( length >= cutLen )
- {
- return 0;
- }
-
- if ( tempArc )
- {
- activeArc = tempArc;
- currentEd = activeArc->to_ed;
- bal_ed = getTwinEdge ( currentEd );
- }
- else
- {
- currentEd = 0;
- }
- }
-
- if ( currentEd == 0 )
- {
- caseA++;
- return 1;
- }
-
- if ( !activeArc )
- {
- fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid );
- }
-
- if ( activeArc->multiplicity == 1 )
- {
- caseB++;
- return 1;
- }
-
- for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
- if ( arc->multiplicity > mult )
- {
- mult = arc->multiplicity;
- }
-
- if ( mult > activeArc->multiplicity )
- {
- caseC++;
- }
-
- return mult > activeArc->multiplicity;
+ unsigned int arcRight_n, arcLeft_n;
+ unsigned int bal_ed;
+ unsigned int currentEd = edgeid;
+ int length = 0;
+ unsigned int mult = 0;
+ ARC *arc, *activeArc = NULL, *tempArc;
+
+ if ( edgeid == 0 )
+ {
+ return 0;
+ }
+
+ bal_ed = getTwinEdge ( edgeid );
+
+ if ( bal_ed == edgeid )
+ {
+ return 0;
+ }
+
+ arcCount ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n > 0 )
+ {
+ return 0;
+ }
+
+ while ( currentEd )
+ {
+ arcCount ( bal_ed, &arcLeft_n );
+ tempArc = arcCount ( currentEd, &arcRight_n );
+
+ if ( arcLeft_n > 1 || arcRight_n > 1 )
+ {
+ if ( arcLeft_n == 0 || length == 0 )
+ {
+ return 0;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ length += edge_array[currentEd].length;
+
+ if ( length >= cutLen )
+ {
+ return 0;
+ }
+
+ if ( tempArc )
+ {
+ activeArc = tempArc;
+ currentEd = activeArc->to_ed;
+ bal_ed = getTwinEdge ( currentEd );
+ }
+ else
+ {
+ currentEd = 0;
+ }
+ }
+
+ if ( currentEd == 0 )
+ {
+ caseA++;
+ return 1;
+ }
+
+ if ( !activeArc )
+ {
+ fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid );
+ }
+
+ if ( activeArc->multiplicity == 1 )
+ {
+ caseB++;
+ return 1;
+ }
+
+ for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
+ if ( arc->multiplicity > mult )
+ {
+ mult = arc->multiplicity;
+ }
+
+ if ( mult > activeArc->multiplicity )
+ {
+ caseC++;
+ }
+
+ return mult > activeArc->multiplicity;
}
/*************************************************
@@ -487,27 +487,27 @@ Return:
*************************************************/
void removeDeadArcs ()
{
- unsigned int i, count = 0;
- ARC * arc, *arc_temp;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- arc = edge_array[i].arcs;
-
- while ( arc )
- {
- arc_temp = arc;
- arc = arc->next;
-
- if ( arc_temp->to_ed == 0 )
- {
- count++;
- edge_array[i].arcs = deleteArc ( edge_array[i].arcs, arc_temp );
- }
- }
- }
-
- fprintf ( stderr, "%d dead arc(s) removed.\n", count );
+ unsigned int i, count = 0;
+ ARC *arc, *arc_temp;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ arc = edge_array[i].arcs;
+
+ while ( arc )
+ {
+ arc_temp = arc;
+ arc = arc->next;
+
+ if ( arc_temp->to_ed == 0 )
+ {
+ count++;
+ edge_array[i].arcs = deleteArc ( edge_array[i].arcs, arc_temp );
+ }
+ }
+ }
+
+ fprintf ( stderr, "%d dead arc(s) removed.\n", count );
}
/*************************************************
@@ -525,52 +525,52 @@ Return:
*************************************************/
void cutTipsInGraph ( int cutLen, boolean strict, boolean last )
{
- int flag = 1;
- unsigned int i;
-
- if ( !cutLen )
- {
- cutLen = 2 * overlaplen;
- }
-
- fprintf ( stderr, "\nStrict: %d, cutoff length: %d.\n", strict, cutLen );
-
- if ( strict )
- {
- linearConcatenate ( 0, last );
- }
-
- caseA = caseB = caseC = caseD = caseE = 0;
- int round = 1;
-
- while ( flag )
- {
- flag = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].deleted )
- {
- continue;
- }
-
- if ( isUnreliableTip ( i, cutLen, strict ) )
- {
- destroyEdge ( i );
- flag++;
- }
- }
-
- fprintf ( stderr, "%d tips cut in cycle %d.\n", flag, round++ );
- }
-
- removeDeadArcs ();
-
- if ( strict )
- {
- fprintf ( stderr, "Case A %d, B %d C %d D %d E %d.\n", caseA, caseB, caseC, caseD, caseE );
- }
-
- linearConcatenate ( 0, last );
- compactEdgeArray ();
+ int flag = 1;
+ unsigned int i;
+
+ if ( !cutLen )
+ {
+ cutLen = 2 * overlaplen;
+ }
+
+ fprintf ( stderr, "\nStrict: %d, cutoff length: %d.\n", strict, cutLen );
+
+ if ( strict )
+ {
+ linearConcatenate ( 0, last );
+ }
+
+ caseA = caseB = caseC = caseD = caseE = 0;
+ int round = 1;
+
+ while ( flag )
+ {
+ flag = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].deleted )
+ {
+ continue;
+ }
+
+ if ( isUnreliableTip ( i, cutLen, strict ) )
+ {
+ destroyEdge ( i );
+ flag++;
+ }
+ }
+
+ fprintf ( stderr, "%d tips cut in cycle %d.\n", flag, round++ );
+ }
+
+ removeDeadArcs ();
+
+ if ( strict )
+ {
+ fprintf ( stderr, "Case A %d, B %d C %d D %d E %d.\n", caseA, caseB, caseC, caseD, caseE );
+ }
+
+ linearConcatenate ( 0, last );
+ compactEdgeArray ();
}
diff --git a/standardPregraph/cutTip_graph2.c b/standardPregraph/cutTip_graph2.c
index 9ab776c..cb58170 100644
--- a/standardPregraph/cutTip_graph2.c
+++ b/standardPregraph/cutTip_graph2.c
@@ -1,7 +1,7 @@
/*
* cutTip_graph2.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -33,67 +33,69 @@ void removeDeadArcs2();
//Destroy the edge.
void destroyEdge2 ( unsigned int edgeid )
{
- unsigned int bal_ed = getTwinEdge ( edgeid );
- ARC * arc;
-
- if ( bal_ed == edgeid )
- {
- edge_array[edgeid].length = 0;
- return;
- }
-
- arc = edge_array[edgeid].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = 0;
- arc = arc->next;
- }
-
- arc = edge_array[bal_ed].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = 0;
- arc = arc->next;
- }
-
- edge_array[edgeid].arcs = NULL;
- edge_array[bal_ed].arcs = NULL;
- edge_array[edgeid].length = 0;
- edge_array[bal_ed].length = 0;
- edge_array[edgeid].deleted = 1;
- edge_array[bal_ed].deleted = 1;
+ unsigned int bal_ed = getTwinEdge ( edgeid );
+ ARC *arc;
+
+ if ( bal_ed == edgeid )
+ {
+ edge_array[edgeid].length = 0;
+ return;
+ }
+
+ arc = edge_array[edgeid].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = 0;
+ arc = arc->next;
+ }
+
+ arc = edge_array[bal_ed].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = 0;
+ arc = arc->next;
+ }
+
+ edge_array[edgeid].arcs = NULL;
+ edge_array[bal_ed].arcs = NULL;
+ edge_array[edgeid].length = 0;
+ edge_array[bal_ed].length = 0;
+ edge_array[edgeid].deleted = 1;
+ edge_array[bal_ed].deleted = 1;
}
//Count the arc number of edge.
-ARC * arcCount2 ( unsigned int edgeid, unsigned int * num )
+ARC *arcCount2 ( unsigned int edgeid, unsigned int *num )
{
- ARC * arc;
- ARC * firstValidArc = NULL;
- unsigned int count = 0;
- arc = edge_array[edgeid].arcs;
-
- while ( arc )
- {
- if ( arc->to_ed > 0 )
- {
- count++;
-
- if ( count == 1 )
- { firstValidArc = arc; }
- else if ( count > 1 )
- {
- *num = count;
- return firstValidArc;
- }
- }
-
- arc = arc->next;
- }
-
- *num = count;
- return firstValidArc;
+ ARC *arc;
+ ARC *firstValidArc = NULL;
+ unsigned int count = 0;
+ arc = edge_array[edgeid].arcs;
+
+ while ( arc )
+ {
+ if ( arc->to_ed > 0 )
+ {
+ count++;
+
+ if ( count == 1 )
+ {
+ firstValidArc = arc;
+ }
+ else if ( count > 1 )
+ {
+ *num = count;
+ return firstValidArc;
+ }
+ }
+
+ arc = arc->next;
+ }
+
+ *num = count;
+ return firstValidArc;
}
/* multiplicity < multiCutoff
@@ -117,50 +119,56 @@ Return:
*************************************************/
void removeWeakEdges2 ( int lenCutoff, unsigned int multiCutoff, int mink )
{
- unsigned int bal_ed;
- unsigned int arcRight_n, arcLeft_n;
- ARC * arcLeft, *arcRight;
- unsigned int i;
- int counter = 0;
- int round = 1;
- fprintf ( stderr, "Start to destroy weak inner edges.\n" );
- counter = 1;
-
- while ( counter )
- {
- counter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].deleted || edge_array[i].length == 0
- || edge_array[i].length > lenCutoff
- || EdSameAsTwin ( i ) || edge_array[i].multi || edge_array[i].cvg == 0 )
- { continue; }
-
- //keep cvg == 1 from original step
- // if(edge_array[i].cvg == 1)
- // continue;
- bal_ed = getTwinEdge ( i );
- arcRight = arcCount2 ( i, &arcRight_n );
-
- if ( arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff )
- { continue; }
-
- arcLeft = arcCount2 ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff )
- { continue; }
-
- destroyEdge2 ( i );
- counter++;
- }
-
- fprintf ( stderr, "%d weak inner edge(s) destroyed in cycle %d.\n", counter, round++ );
- }
-
- removeDeadArcs2();
- // linearConcatenate();
- // compactEdgeArray();
+ unsigned int bal_ed;
+ unsigned int arcRight_n, arcLeft_n;
+ ARC *arcLeft, *arcRight;
+ unsigned int i;
+ int counter = 0;
+ int round = 1;
+ fprintf ( stderr, "Start to destroy weak inner edges.\n" );
+ counter = 1;
+
+ while ( counter )
+ {
+ counter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].deleted || edge_array[i].length == 0
+ || edge_array[i].length > lenCutoff
+ || EdSameAsTwin ( i ) || edge_array[i].multi || edge_array[i].cvg == 0 )
+ {
+ continue;
+ }
+
+ //keep cvg == 1 from original step
+ // if(edge_array[i].cvg == 1)
+ // continue;
+ bal_ed = getTwinEdge ( i );
+ arcRight = arcCount2 ( i, &arcRight_n );
+
+ if ( arcRight_n > 1 || !arcRight || arcRight->multiplicity > multiCutoff )
+ {
+ continue;
+ }
+
+ arcLeft = arcCount2 ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n > 1 || !arcLeft || arcLeft->multiplicity > multiCutoff )
+ {
+ continue;
+ }
+
+ destroyEdge2 ( i );
+ counter++;
+ }
+
+ fprintf ( stderr, "%d weak inner edge(s) destroyed in cycle %d.\n", counter, round++ );
+ }
+
+ removeDeadArcs2();
+ // linearConcatenate();
+ // compactEdgeArray();
}
/*
@@ -198,38 +206,42 @@ Return:
*************************************************/
void removeLowCovEdges2 ( int lenCutoff, unsigned short covCutoff, int mink, boolean last )
{
- unsigned int bal_ed;
- unsigned int arcRight_n, arcLeft_n;
- ARC * arcLeft, *arcRight;
- unsigned int i;
- int counter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].deleted || edge_array[i].cvg == 0
- || edge_array[i].cvg > covCutoff * 10
- || edge_array[i].length >= lenCutoff
- || EdSameAsTwin ( i ) || edge_array[i].length == 0 || edge_array[i].multi )
- { continue; }
-
- //keep cvg == 1 from original SOAPdenovo step
- // if(edge_array[i].cvg == 1)
- // continue;
- bal_ed = getTwinEdge ( i );
- arcRight = arcCount2 ( i, &arcRight_n );
- arcLeft = arcCount2 ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n < 1 || arcRight_n < 1 )
- { continue; }
-
- destroyEdge2 ( i );
- counter++;
- }
-
- fprintf ( stderr, "%d inner edge(s) with coverage lower than or equal to %d destroyed.\n", counter, covCutoff );
- removeDeadArcs2();
- linearConcatenate2 ( last );
- compactEdgeArray();
+ unsigned int bal_ed;
+ unsigned int arcRight_n, arcLeft_n;
+ ARC *arcLeft, *arcRight;
+ unsigned int i;
+ int counter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].deleted || edge_array[i].cvg == 0
+ || edge_array[i].cvg > covCutoff * 10
+ || edge_array[i].length >= lenCutoff
+ || EdSameAsTwin ( i ) || edge_array[i].length == 0 || edge_array[i].multi )
+ {
+ continue;
+ }
+
+ //keep cvg == 1 from original SOAPdenovo step
+ // if(edge_array[i].cvg == 1)
+ // continue;
+ bal_ed = getTwinEdge ( i );
+ arcRight = arcCount2 ( i, &arcRight_n );
+ arcLeft = arcCount2 ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n < 1 || arcRight_n < 1 )
+ {
+ continue;
+ }
+
+ destroyEdge2 ( i );
+ counter++;
+ }
+
+ fprintf ( stderr, "%d inner edge(s) with coverage lower than or equal to %d destroyed.\n", counter, covCutoff );
+ removeDeadArcs2();
+ linearConcatenate2 ( last );
+ compactEdgeArray();
}
/*************************************************
@@ -248,196 +260,236 @@ Return:
*************************************************/
boolean isUnreliableTip2 ( unsigned int edgeid, int cutLen, boolean strict )
{
- unsigned int arcRight_n, arcLeft_n;
- unsigned int bal_ed;
- unsigned int currentEd = edgeid;
- int length = 0;
- unsigned int mult = 0;
- ARC * arc, *activeArc = NULL, *tempArc;
- unsigned int prevEd = currentEd;
-
- if ( edgeid == 0 )
- { return 0; }
-
- bal_ed = getTwinEdge ( edgeid );
-
- if ( bal_ed == edgeid )
- { return 0; }
-
- arcCount2 ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n > 0 )
- { return 0; }
-
- while ( currentEd )
- {
- prevEd = currentEd;
- arcCount2 ( bal_ed, &arcLeft_n );
- tempArc = arcCount2 ( currentEd, &arcRight_n );
-
- if ( arcLeft_n > 1 || arcRight_n > 1 )
- { break; }
-
- length += edge_array[currentEd].length;
-
- if ( tempArc )
- {
- activeArc = tempArc;
- currentEd = activeArc->to_ed;
- bal_ed = getTwinEdge ( currentEd );
- }
- else
- { currentEd = 0; }
- }
-
- if ( length >= cutLen )
- {
- return 0;
- }
-
- if ( currentEd == 0 )
- {
- caseB++;
- return 0;
- }
-
- if ( !strict )
- {
- if ( arcLeft_n < 2 )
- { length += edge_array[currentEd].length; }
-
- if ( length >= cutLen )
- { return 0; }
- else
- {
- caseC++;
- return 1;
- }
- }
-
- if ( arcLeft_n < 2 )
- {
- return 0;
- }
-
- if ( !activeArc )
- { fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid ); }
-
- if ( activeArc->multiplicity == 1 )
- {
- caseD++;
- return 1;
- }
-
- for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
- if ( arc->multiplicity > mult )
- { mult = arc->multiplicity; }
-
- if ( mult > activeArc->multiplicity )
- { caseE++; }
-
- return mult > activeArc->multiplicity;
+ unsigned int arcRight_n, arcLeft_n;
+ unsigned int bal_ed;
+ unsigned int currentEd = edgeid;
+ int length = 0;
+ unsigned int mult = 0;
+ ARC *arc, *activeArc = NULL, *tempArc;
+ unsigned int prevEd = currentEd;
+
+ if ( edgeid == 0 )
+ {
+ return 0;
+ }
+
+ bal_ed = getTwinEdge ( edgeid );
+
+ if ( bal_ed == edgeid )
+ {
+ return 0;
+ }
+
+ arcCount2 ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n > 0 )
+ {
+ return 0;
+ }
+
+ while ( currentEd )
+ {
+ prevEd = currentEd;
+ arcCount2 ( bal_ed, &arcLeft_n );
+ tempArc = arcCount2 ( currentEd, &arcRight_n );
+
+ if ( arcLeft_n > 1 || arcRight_n > 1 )
+ {
+ break;
+ }
+
+ length += edge_array[currentEd].length;
+
+ if ( tempArc )
+ {
+ activeArc = tempArc;
+ currentEd = activeArc->to_ed;
+ bal_ed = getTwinEdge ( currentEd );
+ }
+ else
+ {
+ currentEd = 0;
+ }
+ }
+
+ if ( length >= cutLen )
+ {
+ return 0;
+ }
+
+ if ( currentEd == 0 )
+ {
+ caseB++;
+ return 0;
+ }
+
+ if ( !strict )
+ {
+ if ( arcLeft_n < 2 )
+ {
+ length += edge_array[currentEd].length;
+ }
+
+ if ( length >= cutLen )
+ {
+ return 0;
+ }
+ else
+ {
+ caseC++;
+ return 1;
+ }
+ }
+
+ if ( arcLeft_n < 2 )
+ {
+ return 0;
+ }
+
+ if ( !activeArc )
+ {
+ fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid );
+ }
+
+ if ( activeArc->multiplicity == 1 )
+ {
+ caseD++;
+ return 1;
+ }
+
+ for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
+ if ( arc->multiplicity > mult )
+ {
+ mult = arc->multiplicity;
+ }
+
+ if ( mult > activeArc->multiplicity )
+ {
+ caseE++;
+ }
+
+ return mult > activeArc->multiplicity;
}
boolean isUnreliableTip_strict2 ( unsigned int edgeid, int cutLen )
{
- unsigned int arcRight_n, arcLeft_n;
- unsigned int bal_ed;
- unsigned int currentEd = edgeid;
- int length = 0;
- unsigned int mult = 0;
- ARC * arc, *activeArc = NULL, *tempArc;
-
- if ( edgeid == 0 )
- { return 0; }
-
- bal_ed = getTwinEdge ( edgeid );
-
- if ( bal_ed == edgeid )
- { return 0; }
-
- arcCount2 ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n > 0 )
- { return 0; }
-
- while ( currentEd )
- {
- arcCount2 ( bal_ed, &arcLeft_n );
- tempArc = arcCount2 ( currentEd, &arcRight_n );
-
- if ( arcLeft_n > 1 || arcRight_n > 1 )
- {
- if ( arcLeft_n == 0 || length == 0 )
- { return 0; }
- else
- { break; }
- }
-
- length += edge_array[currentEd].length;
-
- if ( length >= cutLen )
- { return 0; }
-
- if ( tempArc )
- {
- activeArc = tempArc;
- currentEd = activeArc->to_ed;
- bal_ed = getTwinEdge ( currentEd );
- }
- else
- { currentEd = 0; }
- }
-
- if ( currentEd == 0 )
- {
- caseA++;
- return 1;
- }
-
- if ( !activeArc )
- { fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid ); }
-
- if ( activeArc->multiplicity == 1 )
- {
- caseB++;
- return 1;
- }
-
- for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
- if ( arc->multiplicity > mult )
- { mult = arc->multiplicity; }
-
- if ( mult > activeArc->multiplicity )
- { caseC++; }
-
- return mult > activeArc->multiplicity;
+ unsigned int arcRight_n, arcLeft_n;
+ unsigned int bal_ed;
+ unsigned int currentEd = edgeid;
+ int length = 0;
+ unsigned int mult = 0;
+ ARC *arc, *activeArc = NULL, *tempArc;
+
+ if ( edgeid == 0 )
+ {
+ return 0;
+ }
+
+ bal_ed = getTwinEdge ( edgeid );
+
+ if ( bal_ed == edgeid )
+ {
+ return 0;
+ }
+
+ arcCount2 ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n > 0 )
+ {
+ return 0;
+ }
+
+ while ( currentEd )
+ {
+ arcCount2 ( bal_ed, &arcLeft_n );
+ tempArc = arcCount2 ( currentEd, &arcRight_n );
+
+ if ( arcLeft_n > 1 || arcRight_n > 1 )
+ {
+ if ( arcLeft_n == 0 || length == 0 )
+ {
+ return 0;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ length += edge_array[currentEd].length;
+
+ if ( length >= cutLen )
+ {
+ return 0;
+ }
+
+ if ( tempArc )
+ {
+ activeArc = tempArc;
+ currentEd = activeArc->to_ed;
+ bal_ed = getTwinEdge ( currentEd );
+ }
+ else
+ {
+ currentEd = 0;
+ }
+ }
+
+ if ( currentEd == 0 )
+ {
+ caseA++;
+ return 1;
+ }
+
+ if ( !activeArc )
+ {
+ fprintf ( stderr, "No activeArc while checking edge %d.\n", edgeid );
+ }
+
+ if ( activeArc->multiplicity == 1 )
+ {
+ caseB++;
+ return 1;
+ }
+
+ for ( arc = edge_array[bal_ed].arcs; arc != NULL; arc = arc->next )
+ if ( arc->multiplicity > mult )
+ {
+ mult = arc->multiplicity;
+ }
+
+ if ( mult > activeArc->multiplicity )
+ {
+ caseC++;
+ }
+
+ return mult > activeArc->multiplicity;
}
//Remove the arcs that set to 0.
void removeDeadArcs2()
{
- unsigned int i, count = 0;
- ARC * arc, *arc_temp;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- arc = edge_array[i].arcs;
-
- while ( arc )
- {
- arc_temp = arc;
- arc = arc->next;
-
- if ( arc_temp->to_ed == 0 )
- {
- count++;
- edge_array[i].arcs = deleteArc ( edge_array[i].arcs, arc_temp );
- }
- }
- }
-
- fprintf ( stderr, "%d dead arc(s) removed.\n", count );
+ unsigned int i, count = 0;
+ ARC *arc, *arc_temp;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ arc = edge_array[i].arcs;
+
+ while ( arc )
+ {
+ arc_temp = arc;
+ arc = arc->next;
+
+ if ( arc_temp->to_ed == 0 )
+ {
+ count++;
+ edge_array[i].arcs = deleteArc ( edge_array[i].arcs, arc_temp );
+ }
+ }
+ }
+
+ fprintf ( stderr, "%d dead arc(s) removed.\n", count );
}
/*************************************************
@@ -456,45 +508,53 @@ Return:
*************************************************/
void cutTipsInGraph2 ( int cutLen, boolean strict, boolean last )
{
- int flag = 1;
- unsigned int i;
-
- if ( !cutLen )
- { cutLen = 2 * overlaplen; }
-
- fprintf ( stderr, "\nStrict: %d, cutoff length: %d.\n", strict, cutLen );
-
- if ( strict )
- { linearConcatenate2 ( last ); }
-
- caseA = caseB = caseC = caseD = caseE = 0;
- int round = 1;
-
- while ( flag )
- {
- flag = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].deleted )
- { continue; }
-
- if ( !edge_array[i].multi && isUnreliableTip2 ( i, cutLen, strict ) )
- {
- destroyEdge2 ( i );
- flag++;
- }
- }
-
- fprintf ( stderr, "%d tips cut in cycle %d.\n", flag, round++ );
- }
-
- removeDeadArcs2();
-
- if ( strict )
- { fprintf ( stderr, "Case A %d, B %d C %d D %d E %d.\n", caseA, caseB, caseC, caseD, caseE ); }
-
- linearConcatenate2 ( last );
- compactEdgeArray();
+ int flag = 1;
+ unsigned int i;
+
+ if ( !cutLen )
+ {
+ cutLen = 2 * overlaplen;
+ }
+
+ fprintf ( stderr, "\nStrict: %d, cutoff length: %d.\n", strict, cutLen );
+
+ if ( strict )
+ {
+ linearConcatenate2 ( last );
+ }
+
+ caseA = caseB = caseC = caseD = caseE = 0;
+ int round = 1;
+
+ while ( flag )
+ {
+ flag = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].deleted )
+ {
+ continue;
+ }
+
+ if ( !edge_array[i].multi && isUnreliableTip2 ( i, cutLen, strict ) )
+ {
+ destroyEdge2 ( i );
+ flag++;
+ }
+ }
+
+ fprintf ( stderr, "%d tips cut in cycle %d.\n", flag, round++ );
+ }
+
+ removeDeadArcs2();
+
+ if ( strict )
+ {
+ fprintf ( stderr, "Case A %d, B %d C %d D %d E %d.\n", caseA, caseB, caseC, caseD, caseE );
+ }
+
+ linearConcatenate2 ( last );
+ compactEdgeArray();
}
diff --git a/standardPregraph/darray.c b/standardPregraph/darray.c
index 66ca668..5b8f443 100644
--- a/standardPregraph/darray.c
+++ b/standardPregraph/darray.c
@@ -1,7 +1,7 @@
/*
* darray.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -23,67 +23,67 @@
#include "darray.h"
#include "check.h"
-DARRAY * createDarray ( int num_items, size_t unit_size )
+DARRAY *createDarray ( int num_items, size_t unit_size )
{
- DARRAY * newDarray = ( DARRAY * ) malloc ( 1 * sizeof ( DARRAY ) );
- newDarray->array_size = num_items;
- newDarray->item_size = unit_size;
- newDarray->item_c = 0;
- newDarray->array = ( void * ) ckalloc ( num_items * unit_size );
- return newDarray;
+ DARRAY *newDarray = ( DARRAY * ) malloc ( 1 * sizeof ( DARRAY ) );
+ newDarray->array_size = num_items;
+ newDarray->item_size = unit_size;
+ newDarray->item_c = 0;
+ newDarray->array = ( void * ) ckalloc ( num_items * unit_size );
+ return newDarray;
}
-void * darrayPut ( DARRAY * darray, long long index )
+void *darrayPut ( DARRAY *darray, long long index )
{
- int i = 2;
+ int i = 2;
- if ( index + 1 > darray->item_c )
- {
- darray->item_c = index + 1;
- }
+ if ( index + 1 > darray->item_c )
+ {
+ darray->item_c = index + 1;
+ }
- if ( index < darray->array_size )
- {
- return darray->array + darray->item_size * index;
- }
+ if ( index < darray->array_size )
+ {
+ return darray->array + darray->item_size * index;
+ }
- while ( index > i * darray->array_size )
- {
- i++;
- }
+ while ( index > i * darray->array_size )
+ {
+ i++;
+ }
- darray->array = ( void * ) ckrealloc ( darray->array, i * darray->array_size * darray->item_size, darray->array_size * darray->item_size );
- darray->array_size *= i;
- return ( void * ) ( ( void * ) darray->array + darray->item_size * index );
+ darray->array = ( void * ) ckrealloc ( darray->array, i * darray->array_size * darray->item_size, darray->array_size * darray->item_size );
+ darray->array_size *= i;
+ return ( void * ) ( ( void * ) darray->array + darray->item_size * index );
}
-void * darrayGet ( DARRAY * darray, long long index )
+void *darrayGet ( DARRAY *darray, long long index )
{
- if ( index < darray->array_size )
- {
- return ( void * ) ( ( void * ) darray->array + darray->item_size * index );
- }
+ if ( index < darray->array_size )
+ {
+ return ( void * ) ( ( void * ) darray->array + darray->item_size * index );
+ }
- fprintf ( stderr, "Index %lld of the array is out of range and the size is %lld.\n", index, darray->array_size );
- return NULL;
+ fprintf ( stderr, "Index %lld of the array is out of range and the size is %lld.\n", index, darray->array_size );
+ return NULL;
}
-void emptyDarray ( DARRAY * darray )
+void emptyDarray ( DARRAY *darray )
{
- darray->item_c = 0;
+ darray->item_c = 0;
}
-void freeDarray ( DARRAY * darray )
+void freeDarray ( DARRAY *darray )
{
- if ( !darray )
- {
- return;
- }
+ if ( !darray )
+ {
+ return;
+ }
- if ( darray->array )
- {
- free ( ( void * ) darray->array );
- }
+ if ( darray->array )
+ {
+ free ( ( void * ) darray->array );
+ }
- free ( ( void * ) darray );
+ free ( ( void * ) darray );
}
diff --git a/standardPregraph/dfib.c b/standardPregraph/dfib.c
index 64f7c5c..f28fe72 100644
--- a/standardPregraph/dfib.c
+++ b/standardPregraph/dfib.c
@@ -41,207 +41,207 @@
#include "extfunc2.h"
#define HEAPBLOCKSIZE 1000
-static int dfh_comparedata ( DFibHeap * h, Time key, unsigned int data, DFibHeapNode * b );
-static DFibHeapNode * allocateDFibHeapNode ( DFibHeap * heap )
+static int dfh_comparedata ( DFibHeap *h, Time key, unsigned int data, DFibHeapNode *b );
+static DFibHeapNode *allocateDFibHeapNode ( DFibHeap *heap )
{
- return ( DFibHeapNode * ) getItem ( heap->nodeMemory );
+ return ( DFibHeapNode * ) getItem ( heap->nodeMemory );
};
-static void deallocateDFibHeapNode ( DFibHeapNode * a, DFibHeap * heap )
+static void deallocateDFibHeapNode ( DFibHeapNode *a, DFibHeap *heap )
{
- returnItem ( heap->nodeMemory, a );
+ returnItem ( heap->nodeMemory, a );
}
-IDnum dfibheap_getSize ( DFibHeap * heap )
+IDnum dfibheap_getSize ( DFibHeap *heap )
{
- return heap->dfh_n;
+ return heap->dfh_n;
}
#define swap(type, a, b) \
- do { \
- type c; \
- c = a; \
- a = b; \
- b = c; \
- } while (0) \
-
+ do { \
+ type c; \
+ c = a; \
+ a = b; \
+ b = c; \
+ } while (0) \
+
#define INT_BITS (sizeof(IDnum) * 8)
static inline IDnum ceillog2 ( IDnum a )
{
- IDnum oa;
- IDnum i;
- IDnum b;
- IDnum cons;
- oa = a;
- b = INT_BITS / 2;
- i = 0;
-
- while ( b )
- {
- i = ( i << 1 );
- cons = ( ( IDnum ) 1 ) << b;
-
- if ( a >= cons )
- {
- a /= cons;
- i = i | 1;
- }
- else
- {
- a &= cons - 1;
- }
-
- b /= 2;
- }
-
- if ( ( ( ( IDnum ) 1 << i ) ) == oa )
- {
- return i;
- }
- else
- {
- return i + 1;
- }
+ IDnum oa;
+ IDnum i;
+ IDnum b;
+ IDnum cons;
+ oa = a;
+ b = INT_BITS / 2;
+ i = 0;
+
+ while ( b )
+ {
+ i = ( i << 1 );
+ cons = ( ( IDnum ) 1 ) << b;
+
+ if ( a >= cons )
+ {
+ a /= cons;
+ i = i | 1;
+ }
+ else
+ {
+ a &= cons - 1;
+ }
+
+ b /= 2;
+ }
+
+ if ( ( ( ( IDnum ) 1 << i ) ) == oa )
+ {
+ return i;
+ }
+ else
+ {
+ return i + 1;
+ }
}
/*
* Public Heap Functions
*/
-DFibHeap * dfh_makekeyheap ()
+DFibHeap *dfh_makekeyheap ()
{
- DFibHeap * n;
-
- if ( ( n = malloc ( sizeof * n ) ) == NULL )
- {
- return NULL;
- }
-
- n->nodeMemory = createMem_manager ( HEAPBLOCKSIZE, sizeof ( DFibHeapNode ) );
- n->dfh_n = 0;
- n->dfh_Dl = -1;
- n->dfh_cons = NULL;
- n->dfh_min = NULL;
- n->dfh_root = NULL;
- return n;
+ DFibHeap *n;
+
+ if ( ( n = malloc ( sizeof * n ) ) == NULL )
+ {
+ return NULL;
+ }
+
+ n->nodeMemory = createMem_manager ( HEAPBLOCKSIZE, sizeof ( DFibHeapNode ) );
+ n->dfh_n = 0;
+ n->dfh_Dl = -1;
+ n->dfh_cons = NULL;
+ n->dfh_min = NULL;
+ n->dfh_root = NULL;
+ return n;
}
-void dfh_deleteheap ( DFibHeap * h )
+void dfh_deleteheap ( DFibHeap *h )
{
- fprintf ( stderr, "DFibHeap: %lld node(s) allocated.\n", h->nodeMemory->counter );
- freeMem_manager ( h->nodeMemory );
- h->nodeMemory = NULL;
+ fprintf ( stderr, "DFibHeap: %lld node(s) allocated.\n", h->nodeMemory->counter );
+ freeMem_manager ( h->nodeMemory );
+ h->nodeMemory = NULL;
- if ( h->dfh_cons != NULL )
- {
- free ( h->dfh_cons );
- }
+ if ( h->dfh_cons != NULL )
+ {
+ free ( h->dfh_cons );
+ }
- free ( h );
+ free ( h );
}
/*
* Public Key Heap Functions
*/
-DFibHeapNode * dfh_insertkey ( DFibHeap * h, Time key, unsigned int data )
+DFibHeapNode *dfh_insertkey ( DFibHeap *h, Time key, unsigned int data )
{
- DFibHeapNode * x;
-
- if ( ( x = dfhe_newelem ( h ) ) == NULL )
- {
- return NULL;
- }
-
- /* just insert on root list, and make sure it's not the new min */
- x->dfhe_data = data;
- x->dfhe_key = key;
- dfh_insertel ( h, x );
- return x;
+ DFibHeapNode *x;
+
+ if ( ( x = dfhe_newelem ( h ) ) == NULL )
+ {
+ return NULL;
+ }
+
+ /* just insert on root list, and make sure it's not the new min */
+ x->dfhe_data = data;
+ x->dfhe_key = key;
+ dfh_insertel ( h, x );
+ return x;
}
-Time dfh_replacekey ( DFibHeap * h, DFibHeapNode * x, Time key )
+Time dfh_replacekey ( DFibHeap *h, DFibHeapNode *x, Time key )
{
- Time ret;
- ret = x->dfhe_key;
- ( void ) dfh_replacekeydata ( h, x, key, x->dfhe_data );
- return ret;
+ Time ret;
+ ret = x->dfhe_key;
+ ( void ) dfh_replacekeydata ( h, x, key, x->dfhe_data );
+ return ret;
}
-unsigned int minInDHeap ( DFibHeap * h )
+unsigned int minInDHeap ( DFibHeap *h )
{
- if ( h->dfh_min )
- {
- return h->dfh_min->dfhe_data;
- }
- else
- {
- return 0;
- }
+ if ( h->dfh_min )
+ {
+ return h->dfh_min->dfhe_data;
+ }
+ else
+ {
+ return 0;
+ }
}
-boolean HasMin ( DFibHeap * h )
+boolean HasMin ( DFibHeap *h )
{
- if ( h->dfh_min )
- {
- return 1;
- }
- else
- {
- return 0;
- }
+ if ( h->dfh_min )
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
}
-unsigned int dfh_replacekeydata ( DFibHeap * h, DFibHeapNode * x, Time key, unsigned int data )
+unsigned int dfh_replacekeydata ( DFibHeap *h, DFibHeapNode *x, Time key, unsigned int data )
{
- unsigned int odata;
- Time okey;
- DFibHeapNode * y;
- int r;
- odata = x->dfhe_data;
- okey = x->dfhe_key;
-
- /*
- * we can increase a key by deleting and reinserting, that
- * requires O(lgn) time.
- */
- if ( ( r = dfh_comparedata ( h, key, data, x ) ) > 0 )
- {
- /* XXX - bad code! */
- abort ();
- }
-
- x->dfhe_data = data;
- x->dfhe_key = key;
-
- /* because they are equal, we don't have to do anything */
- if ( r == 0 )
- {
- return odata;
- }
-
- y = x->dfhe_p;
-
- if ( okey == key )
- {
- return odata;
- }
-
- if ( y != NULL && dfh_compare ( h, x, y ) <= 0 )
- {
- dfh_cut ( h, x, y );
- dfh_cascading_cut ( h, y );
- }
-
- /*
- * the = is so that the call from dfh_delete will delete the proper
- * element.
- */
- if ( dfh_compare ( h, x, h->dfh_min ) <= 0 )
- {
- h->dfh_min = x;
- }
-
- return odata;
+ unsigned int odata;
+ Time okey;
+ DFibHeapNode *y;
+ int r;
+ odata = x->dfhe_data;
+ okey = x->dfhe_key;
+
+ /*
+ * we can increase a key by deleting and reinserting, that
+ * requires O(lgn) time.
+ */
+ if ( ( r = dfh_comparedata ( h, key, data, x ) ) > 0 )
+ {
+ /* XXX - bad code! */
+ abort ();
+ }
+
+ x->dfhe_data = data;
+ x->dfhe_key = key;
+
+ /* because they are equal, we don't have to do anything */
+ if ( r == 0 )
+ {
+ return odata;
+ }
+
+ y = x->dfhe_p;
+
+ if ( okey == key )
+ {
+ return odata;
+ }
+
+ if ( y != NULL && dfh_compare ( h, x, y ) <= 0 )
+ {
+ dfh_cut ( h, x, y );
+ dfh_cascading_cut ( h, y );
+ }
+
+ /*
+ * the = is so that the call from dfh_delete will delete the proper
+ * element.
+ */
+ if ( dfh_compare ( h, x, h->dfh_min ) <= 0 )
+ {
+ h->dfh_min = x;
+ }
+
+ return odata;
}
/*
@@ -252,343 +252,345 @@ unsigned int dfh_replacekeydata ( DFibHeap * h, DFibHeapNode * x, Time key, unsi
* NULL failed for some reason
* ptr token to use for manipulation of data
*/
-unsigned int dfh_extractmin ( DFibHeap * h )
+unsigned int dfh_extractmin ( DFibHeap *h )
{
- DFibHeapNode * z;
- unsigned int ret;
- ret = 0;
-
- if ( h->dfh_min != NULL )
- {
- z = dfh_extractminel ( h );
- ret = z->dfhe_data;
- deallocateDFibHeapNode ( z, h );
- }
-
- return ret;
+ DFibHeapNode *z;
+ unsigned int ret;
+ ret = 0;
+
+ if ( h->dfh_min != NULL )
+ {
+ z = dfh_extractminel ( h );
+ ret = z->dfhe_data;
+ deallocateDFibHeapNode ( z, h );
+ }
+
+ return ret;
}
-unsigned int dfh_replacedata ( DFibHeapNode * x, unsigned int data )
+unsigned int dfh_replacedata ( DFibHeapNode *x, unsigned int data )
{
- unsigned int odata = x->dfhe_data;
- //printf("replace node value %d with %d\n",x->dfhe_data,data);
- x->dfhe_data = data;
- return odata;
+ unsigned int odata = x->dfhe_data;
+ //printf("replace node value %d with %d\n",x->dfhe_data,data);
+ x->dfhe_data = data;
+ return odata;
}
-unsigned int dfh_delete ( DFibHeap * h, DFibHeapNode * x )
+unsigned int dfh_delete ( DFibHeap *h, DFibHeapNode *x )
{
- unsigned int k;
- //printf("destroy node %d in dheap\n",x->dfhe_data);
- k = x->dfhe_data;
- dfh_replacekey ( h, x, INT_MIN );
- dfh_extractmin ( h );
- return k;
+ unsigned int k;
+ //printf("destroy node %d in dheap\n",x->dfhe_data);
+ k = x->dfhe_data;
+ dfh_replacekey ( h, x, INT_MIN );
+ dfh_extractmin ( h );
+ return k;
}
/*
* begin of private element fuctions
*/
-static DFibHeapNode * dfh_extractminel ( DFibHeap * h )
+static DFibHeapNode *dfh_extractminel ( DFibHeap *h )
{
- DFibHeapNode * ret;
- DFibHeapNode * x, *y, *orig;
- ret = h->dfh_min;
- orig = NULL;
-
- /* put all the children on the root list */
- /* for true consistancy, we should use dfhe_remove */
- for ( x = ret->dfhe_child; x != orig && x != NULL; )
- {
- if ( orig == NULL )
- {
- orig = x;
- }
-
- y = x->dfhe_right;
- x->dfhe_p = NULL;
- dfh_insertrootlist ( h, x );
- x = y;
- }
-
- /* remove minimum from root list */
- dfh_removerootlist ( h, ret );
- h->dfh_n--;
-
- /* if we aren't empty, consolidate the heap */
- if ( h->dfh_n == 0 )
- {
- h->dfh_min = NULL;
- }
- else
- {
- h->dfh_min = ret->dfhe_right;
- dfh_consolidate ( h );
- }
-
- return ret;
+ DFibHeapNode *ret;
+ DFibHeapNode *x, *y, *orig;
+ ret = h->dfh_min;
+ orig = NULL;
+
+ /* put all the children on the root list */
+ /* for true consistancy, we should use dfhe_remove */
+ for ( x = ret->dfhe_child; x != orig && x != NULL; )
+ {
+ if ( orig == NULL )
+ {
+ orig = x;
+ }
+
+ y = x->dfhe_right;
+ x->dfhe_p = NULL;
+ dfh_insertrootlist ( h, x );
+ x = y;
+ }
+
+ /* remove minimum from root list */
+ dfh_removerootlist ( h, ret );
+ h->dfh_n--;
+
+ /* if we aren't empty, consolidate the heap */
+ if ( h->dfh_n == 0 )
+ {
+ h->dfh_min = NULL;
+ }
+ else
+ {
+ h->dfh_min = ret->dfhe_right;
+ dfh_consolidate ( h );
+ }
+
+ return ret;
}
-static void dfh_insertrootlist ( DFibHeap * h, DFibHeapNode * x )
+static void dfh_insertrootlist ( DFibHeap *h, DFibHeapNode *x )
{
- if ( h->dfh_root == NULL )
- {
- h->dfh_root = x;
- x->dfhe_left = x;
- x->dfhe_right = x;
- return;
- }
-
- dfhe_insertafter ( h->dfh_root, x );
+ if ( h->dfh_root == NULL )
+ {
+ h->dfh_root = x;
+ x->dfhe_left = x;
+ x->dfhe_right = x;
+ return;
+ }
+
+ dfhe_insertafter ( h->dfh_root, x );
}
-static void dfh_removerootlist ( DFibHeap * h, DFibHeapNode * x )
+static void dfh_removerootlist ( DFibHeap *h, DFibHeapNode *x )
{
- if ( x->dfhe_left == x )
- {
- h->dfh_root = NULL;
- }
- else
- {
- h->dfh_root = dfhe_remove ( x );
- }
+ if ( x->dfhe_left == x )
+ {
+ h->dfh_root = NULL;
+ }
+ else
+ {
+ h->dfh_root = dfhe_remove ( x );
+ }
}
-static void dfh_consolidate ( DFibHeap * h )
+static void dfh_consolidate ( DFibHeap *h )
{
- DFibHeapNode ** a;
- DFibHeapNode * w;
- DFibHeapNode * y;
- DFibHeapNode * x;
- IDnum i;
- IDnum d;
- IDnum D;
- dfh_checkcons ( h );
- /* assign a the value of h->dfh_cons so I don't have to rewrite code */
- D = h->dfh_Dl + 1;
- a = h->dfh_cons;
-
- for ( i = 0; i < D; i++ )
- {
- a[i] = NULL;
- }
-
- while ( ( w = h->dfh_root ) != NULL )
- {
- x = w;
- dfh_removerootlist ( h, w );
- d = x->dfhe_degree;
-
- /* XXX - assert that d < D */
- while ( a[d] != NULL )
- {
- y = a[d];
-
- if ( dfh_compare ( h, x, y ) > 0 )
- {
- swap ( DFibHeapNode *, x, y );
- }
-
- dfh_heaplink ( h, y, x );
- a[d] = NULL;
- d++;
- }
-
- a[d] = x;
- }
-
- h->dfh_min = NULL;
-
- for ( i = 0; i < D; i++ )
- if ( a[i] != NULL )
- {
- dfh_insertrootlist ( h, a[i] );
-
- if ( h->dfh_min == NULL || dfh_compare ( h, a[i], h->dfh_min ) < 0 )
- {
- h->dfh_min = a[i];
- }
- }
+ DFibHeapNode **a;
+ DFibHeapNode *w;
+ DFibHeapNode *y;
+ DFibHeapNode *x;
+ IDnum i;
+ IDnum d;
+ IDnum D;
+ dfh_checkcons ( h );
+ /* assign a the value of h->dfh_cons so I don't have to rewrite code */
+ D = h->dfh_Dl + 1;
+ a = h->dfh_cons;
+
+ for ( i = 0; i < D; i++ )
+ {
+ a[i] = NULL;
+ }
+
+ while ( ( w = h->dfh_root ) != NULL )
+ {
+ x = w;
+ dfh_removerootlist ( h, w );
+ d = x->dfhe_degree;
+
+ /* XXX - assert that d < D */
+ while ( a[d] != NULL )
+ {
+ y = a[d];
+
+ if ( dfh_compare ( h, x, y ) > 0 )
+ {
+ swap ( DFibHeapNode *, x, y );
+ }
+
+ dfh_heaplink ( h, y, x );
+ a[d] = NULL;
+ d++;
+ }
+
+ a[d] = x;
+ }
+
+ h->dfh_min = NULL;
+
+ for ( i = 0; i < D; i++ )
+ if ( a[i] != NULL )
+ {
+ dfh_insertrootlist ( h, a[i] );
+
+ if ( h->dfh_min == NULL || dfh_compare ( h, a[i], h->dfh_min ) < 0 )
+ {
+ h->dfh_min = a[i];
+ }
+ }
}
-static void dfh_heaplink ( DFibHeap * h, DFibHeapNode * y, DFibHeapNode * x )
+static void dfh_heaplink ( DFibHeap *h, DFibHeapNode *y, DFibHeapNode *x )
{
- /* make y a child of x */
- if ( x->dfhe_child == NULL )
- {
- x->dfhe_child = y;
- }
- else
- {
- dfhe_insertbefore ( x->dfhe_child, y );
- }
-
- y->dfhe_p = x;
- x->dfhe_degree++;
- y->dfhe_mark = 0;
+ /* make y a child of x */
+ if ( x->dfhe_child == NULL )
+ {
+ x->dfhe_child = y;
+ }
+ else
+ {
+ dfhe_insertbefore ( x->dfhe_child, y );
+ }
+
+ y->dfhe_p = x;
+ x->dfhe_degree++;
+ y->dfhe_mark = 0;
}
-static void dfh_cut ( DFibHeap * h, DFibHeapNode * x, DFibHeapNode * y )
+static void dfh_cut ( DFibHeap *h, DFibHeapNode *x, DFibHeapNode *y )
{
- dfhe_remove ( x );
- y->dfhe_degree--;
- dfh_insertrootlist ( h, x );
- x->dfhe_p = NULL;
- x->dfhe_mark = 0;
+ dfhe_remove ( x );
+ y->dfhe_degree--;
+ dfh_insertrootlist ( h, x );
+ x->dfhe_p = NULL;
+ x->dfhe_mark = 0;
}
-static void dfh_cascading_cut ( DFibHeap * h, DFibHeapNode * y )
+static void dfh_cascading_cut ( DFibHeap *h, DFibHeapNode *y )
{
- DFibHeapNode * z;
-
- while ( ( z = y->dfhe_p ) != NULL )
- {
- if ( y->dfhe_mark == 0 )
- {
- y->dfhe_mark = 1;
- return;
- }
- else
- {
- dfh_cut ( h, y, z );
- y = z;
- }
- }
+ DFibHeapNode *z;
+
+ while ( ( z = y->dfhe_p ) != NULL )
+ {
+ if ( y->dfhe_mark == 0 )
+ {
+ y->dfhe_mark = 1;
+ return;
+ }
+ else
+ {
+ dfh_cut ( h, y, z );
+ y = z;
+ }
+ }
}
/*
* begining of handling elements of dfibheap
*/
-static DFibHeapNode * dfhe_newelem ( DFibHeap * h )
+static DFibHeapNode *dfhe_newelem ( DFibHeap *h )
{
- DFibHeapNode * e;
-
- if ( ( e = allocateDFibHeapNode ( h ) ) == NULL )
- {
- return NULL;
- }
-
- e->dfhe_degree = 0;
- e->dfhe_mark = 0;
- e->dfhe_p = NULL;
- e->dfhe_child = NULL;
- e->dfhe_left = e;
- e->dfhe_right = e;
- e->dfhe_data = 0;
- return e;
+ DFibHeapNode *e;
+
+ if ( ( e = allocateDFibHeapNode ( h ) ) == NULL )
+ {
+ return NULL;
+ }
+
+ e->dfhe_degree = 0;
+ e->dfhe_mark = 0;
+ e->dfhe_p = NULL;
+ e->dfhe_child = NULL;
+ e->dfhe_left = e;
+ e->dfhe_right = e;
+ e->dfhe_data = 0;
+ return e;
}
-static void dfhe_insertafter ( DFibHeapNode * a, DFibHeapNode * b )
+static void dfhe_insertafter ( DFibHeapNode *a, DFibHeapNode *b )
{
- if ( a == a->dfhe_right )
- {
- a->dfhe_right = b;
- a->dfhe_left = b;
- b->dfhe_right = a;
- b->dfhe_left = a;
- }
- else
- {
- b->dfhe_right = a->dfhe_right;
- a->dfhe_right->dfhe_left = b;
- a->dfhe_right = b;
- b->dfhe_left = a;
- }
+ if ( a == a->dfhe_right )
+ {
+ a->dfhe_right = b;
+ a->dfhe_left = b;
+ b->dfhe_right = a;
+ b->dfhe_left = a;
+ }
+ else
+ {
+ b->dfhe_right = a->dfhe_right;
+ a->dfhe_right->dfhe_left = b;
+ a->dfhe_right = b;
+ b->dfhe_left = a;
+ }
}
-static inline void dfhe_insertbefore ( DFibHeapNode * a, DFibHeapNode * b )
+static inline void dfhe_insertbefore ( DFibHeapNode *a, DFibHeapNode *b )
{
- dfhe_insertafter ( a->dfhe_left, b );
+ dfhe_insertafter ( a->dfhe_left, b );
}
-static DFibHeapNode * dfhe_remove ( DFibHeapNode * x )
+static DFibHeapNode *dfhe_remove ( DFibHeapNode *x )
{
- DFibHeapNode * ret;
-
- if ( x == x->dfhe_left )
- {
- ret = NULL;
- }
- else
- {
- ret = x->dfhe_left;
- }
-
- /* fix the parent pointer */
- if ( x->dfhe_p != NULL && x->dfhe_p->dfhe_child == x )
- {
- x->dfhe_p->dfhe_child = ret;
- }
-
- x->dfhe_right->dfhe_left = x->dfhe_left;
- x->dfhe_left->dfhe_right = x->dfhe_right;
- /* clear out hanging pointers */
- x->dfhe_p = NULL;
- x->dfhe_left = x;
- x->dfhe_right = x;
- return ret;
+ DFibHeapNode *ret;
+
+ if ( x == x->dfhe_left )
+ {
+ ret = NULL;
+ }
+ else
+ {
+ ret = x->dfhe_left;
+ }
+
+ /* fix the parent pointer */
+ if ( x->dfhe_p != NULL && x->dfhe_p->dfhe_child == x )
+ {
+ x->dfhe_p->dfhe_child = ret;
+ }
+
+ x->dfhe_right->dfhe_left = x->dfhe_left;
+ x->dfhe_left->dfhe_right = x->dfhe_right;
+ /* clear out hanging pointers */
+ x->dfhe_p = NULL;
+ x->dfhe_left = x;
+ x->dfhe_right = x;
+ return ret;
}
-static void dfh_checkcons ( DFibHeap * h )
+static void dfh_checkcons ( DFibHeap *h )
{
- IDnum oDl;
-
- /* make sure we have enough memory allocated to "reorganize" */
- if ( h->dfh_Dl == -1 || h->dfh_n > ( 1 << h->dfh_Dl ) )
- {
- oDl = h->dfh_Dl;
-
- if ( ( h->dfh_Dl = ceillog2 ( h->dfh_n ) + 1 ) < 8 )
- {
- h->dfh_Dl = 8;
- }
-
- if ( oDl != h->dfh_Dl )
- { h->dfh_cons = ( DFibHeapNode ** ) realloc ( h->dfh_cons, sizeof * h->dfh_cons * ( h->dfh_Dl + 1 ) ); }
-
- if ( h->dfh_cons == NULL )
- {
- abort ();
- }
- }
+ IDnum oDl;
+
+ /* make sure we have enough memory allocated to "reorganize" */
+ if ( h->dfh_Dl == -1 || h->dfh_n > ( 1 << h->dfh_Dl ) )
+ {
+ oDl = h->dfh_Dl;
+
+ if ( ( h->dfh_Dl = ceillog2 ( h->dfh_n ) + 1 ) < 8 )
+ {
+ h->dfh_Dl = 8;
+ }
+
+ if ( oDl != h->dfh_Dl )
+ {
+ h->dfh_cons = ( DFibHeapNode ** ) realloc ( h->dfh_cons, sizeof * h->dfh_cons * ( h->dfh_Dl + 1 ) );
+ }
+
+ if ( h->dfh_cons == NULL )
+ {
+ abort ();
+ }
+ }
}
-static int dfh_compare ( DFibHeap * h, DFibHeapNode * a, DFibHeapNode * b )
+static int dfh_compare ( DFibHeap *h, DFibHeapNode *a, DFibHeapNode *b )
{
- if ( a->dfhe_key < b->dfhe_key )
- {
- return -1;
- }
+ if ( a->dfhe_key < b->dfhe_key )
+ {
+ return -1;
+ }
- if ( a->dfhe_key == b->dfhe_key )
- {
- return 0;
- }
+ if ( a->dfhe_key == b->dfhe_key )
+ {
+ return 0;
+ }
- return 1;
+ return 1;
}
-static int dfh_comparedata ( DFibHeap * h, Time key, unsigned int data, DFibHeapNode * b )
+static int dfh_comparedata ( DFibHeap *h, Time key, unsigned int data, DFibHeapNode *b )
{
- DFibHeapNode a;
- a.dfhe_key = key;
- a.dfhe_data = data;
- return dfh_compare ( h, &a, b );
+ DFibHeapNode a;
+ a.dfhe_key = key;
+ a.dfhe_data = data;
+ return dfh_compare ( h, &a, b );
}
-static void dfh_insertel ( DFibHeap * h, DFibHeapNode * x )
+static void dfh_insertel ( DFibHeap *h, DFibHeapNode *x )
{
- dfh_insertrootlist ( h, x );
+ dfh_insertrootlist ( h, x );
- if ( h->dfh_min == NULL || x->dfhe_key < h->dfh_min->dfhe_key )
- {
- h->dfh_min = x;
- }
+ if ( h->dfh_min == NULL || x->dfhe_key < h->dfh_min->dfhe_key )
+ {
+ h->dfh_min = x;
+ }
- h->dfh_n++;
+ h->dfh_n++;
}
-Time dfibheap_el_getKey ( DFibHeapNode * node )
+Time dfibheap_el_getKey ( DFibHeapNode *node )
{
- return node->dfhe_key;
+ return node->dfhe_key;
}
diff --git a/standardPregraph/dfibHeap.c b/standardPregraph/dfibHeap.c
index 900171d..51749e8 100644
--- a/standardPregraph/dfibHeap.c
+++ b/standardPregraph/dfibHeap.c
@@ -1,7 +1,7 @@
/*
* dfibHeap.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -27,32 +27,32 @@
#include "dfib.h"
// Return number of elements stored in heap
-IDnum getDFibHeapSize ( DFibHeap * heap )
+IDnum getDFibHeapSize ( DFibHeap *heap )
{
- return dfibheap_getSize ( heap );
+ return dfibheap_getSize ( heap );
}
// Constructor
// Memory allocated
-DFibHeap * newDFibHeap ()
+DFibHeap *newDFibHeap ()
{
- return dfh_makekeyheap ();
+ return dfh_makekeyheap ();
}
// Add new node into heap with a key, and a pointer to the specified node
-DFibHeapNode * insertNodeIntoDHeap ( DFibHeap * heap, Time key, unsigned int node )
+DFibHeapNode *insertNodeIntoDHeap ( DFibHeap *heap, Time key, unsigned int node )
{
- DFibHeapNode * res;
- res = dfh_insertkey ( heap, key, node );
- return res;
+ DFibHeapNode *res;
+ res = dfh_insertkey ( heap, key, node );
+ return res;
}
// Replaces the key for a given node
-Time replaceKeyInDHeap ( DFibHeap * heap, DFibHeapNode * node, Time newKey )
+Time replaceKeyInDHeap ( DFibHeap *heap, DFibHeapNode *node, Time newKey )
{
- Time res;
- res = dfh_replacekey ( heap, node, newKey );
- return res;
+ Time res;
+ res = dfh_replacekey ( heap, node, newKey );
+ return res;
}
@@ -68,32 +68,32 @@ Output:
Return:
The key.
*************************************************/
-unsigned int removeNextNodeFromDHeap ( DFibHeap * heap )
+unsigned int removeNextNodeFromDHeap ( DFibHeap *heap )
{
- unsigned int node;
- node = ( unsigned int ) dfh_extractmin ( heap );
- return node;
+ unsigned int node;
+ node = ( unsigned int ) dfh_extractmin ( heap );
+ return node;
}
// Destructor
-void destroyDHeap ( DFibHeap * heap )
+void destroyDHeap ( DFibHeap *heap )
{
- dfh_deleteheap ( heap );
+ dfh_deleteheap ( heap );
}
// Replace the node pointed to by a heap node
-void replaceValueInDHeap ( DFibHeapNode * node, unsigned int newValue )
+void replaceValueInDHeap ( DFibHeapNode *node, unsigned int newValue )
{
- dfh_replacedata ( node, newValue );
+ dfh_replacedata ( node, newValue );
}
// Remove unwanted node
-void destroyNodeInDHeap ( DFibHeapNode * node, DFibHeap * heap )
+void destroyNodeInDHeap ( DFibHeapNode *node, DFibHeap *heap )
{
- dfh_delete ( heap, node );
+ dfh_delete ( heap, node );
}
-Time getKey ( DFibHeapNode * node )
+Time getKey ( DFibHeapNode *node )
{
- return dfibheap_el_getKey ( node );
+ return dfibheap_el_getKey ( node );
}
diff --git a/standardPregraph/fib.c b/standardPregraph/fib.c
index 999b28a..2111a94 100644
--- a/standardPregraph/fib.c
+++ b/standardPregraph/fib.c
@@ -41,289 +41,289 @@
#define HEAPBLOCKSIZE 10000
-static int fh_comparedata ( FibHeap * h, Coordinate key, unsigned int data, FibHeapNode * b );
-unsigned int fh_replacekeydata ( FibHeap * h, FibHeapNode * x, Coordinate key, unsigned int data );
+static int fh_comparedata ( FibHeap *h, Coordinate key, unsigned int data, FibHeapNode *b );
+unsigned int fh_replacekeydata ( FibHeap *h, FibHeapNode *x, Coordinate key, unsigned int data );
-static FibHeapNode * allocateFibHeapEl ( FibHeap * heap )
+static FibHeapNode *allocateFibHeapEl ( FibHeap *heap )
{
- return ( FibHeapNode * ) getItem ( heap->nodeMemory );
+ return ( FibHeapNode * ) getItem ( heap->nodeMemory );
};
-static void deallocateFibHeapEl ( FibHeapNode * a, FibHeap * heap )
+static void deallocateFibHeapEl ( FibHeapNode *a, FibHeap *heap )
{
- returnItem ( heap->nodeMemory, a );
+ returnItem ( heap->nodeMemory, a );
}
#define swap(type, a, b) \
- do { \
- type c; \
- c = a; \
- a = b; \
- b = c; \
- } while (0) \
-
+ do { \
+ type c; \
+ c = a; \
+ a = b; \
+ b = c; \
+ } while (0) \
+
#define INT_BITS (sizeof(IDnum) * 8)
static inline IDnum ceillog2 ( IDnum a )
{
- IDnum oa;
- IDnum i;
- IDnum b;
- IDnum cons;
- oa = a;
- b = INT_BITS / 2;
- i = 0;
-
- while ( b )
- {
- i = ( i << 1 );
- cons = ( ( IDnum ) 1 ) << b;
-
- if ( a >= cons )
- {
- a /= cons;
- i = i | 1;
- }
- else
- {
- a &= cons - 1;
- }
-
- b /= 2;
- }
-
- if ( ( ( ( IDnum ) 1 << i ) ) == oa )
- {
- return i;
- }
- else
- {
- return i + 1;
- }
+ IDnum oa;
+ IDnum i;
+ IDnum b;
+ IDnum cons;
+ oa = a;
+ b = INT_BITS / 2;
+ i = 0;
+
+ while ( b )
+ {
+ i = ( i << 1 );
+ cons = ( ( IDnum ) 1 ) << b;
+
+ if ( a >= cons )
+ {
+ a /= cons;
+ i = i | 1;
+ }
+ else
+ {
+ a &= cons - 1;
+ }
+
+ b /= 2;
+ }
+
+ if ( ( ( ( IDnum ) 1 << i ) ) == oa )
+ {
+ return i;
+ }
+ else
+ {
+ return i + 1;
+ }
}
/*
* Private Heap Functions
*/
-static void fh_initheap ( FibHeap * new )
+static void fh_initheap ( FibHeap *new )
{
- new->fh_cmp_fnct = NULL;
- new->nodeMemory = createMem_manager ( sizeof ( FibHeapNode ), HEAPBLOCKSIZE );
- new->fh_neginf = 0;
- new->fh_n = 0;
- new->fh_Dl = -1;
- new->fh_cons = NULL;
- new->fh_min = NULL;
- new->fh_root = NULL;
- new->fh_keys = 0;
+ new->fh_cmp_fnct = NULL;
+ new->nodeMemory = createMem_manager ( sizeof ( FibHeapNode ), HEAPBLOCKSIZE );
+ new->fh_neginf = 0;
+ new->fh_n = 0;
+ new->fh_Dl = -1;
+ new->fh_cons = NULL;
+ new->fh_min = NULL;
+ new->fh_root = NULL;
+ new->fh_keys = 0;
}
-static void fh_destroyheap ( FibHeap * h )
+static void fh_destroyheap ( FibHeap *h )
{
- h->fh_cmp_fnct = NULL;
- h->fh_neginf = 0;
+ h->fh_cmp_fnct = NULL;
+ h->fh_neginf = 0;
- if ( h->fh_cons != NULL )
- {
- free ( h->fh_cons );
- }
+ if ( h->fh_cons != NULL )
+ {
+ free ( h->fh_cons );
+ }
- h->fh_cons = NULL;
- free ( h );
+ h->fh_cons = NULL;
+ free ( h );
}
/*
* Public Heap Functions
*/
-FibHeap * fh_makekeyheap ()
+FibHeap *fh_makekeyheap ()
{
- FibHeap * n;
+ FibHeap *n;
- if ( ( n = malloc ( sizeof * n ) ) == NULL )
- {
- return NULL;
- }
+ if ( ( n = malloc ( sizeof * n ) ) == NULL )
+ {
+ return NULL;
+ }
- fh_initheap ( n );
- n->fh_keys = 1;
- return n;
+ fh_initheap ( n );
+ n->fh_keys = 1;
+ return n;
}
-FibHeap * fh_makeheap ()
+FibHeap *fh_makeheap ()
{
- FibHeap * n;
+ FibHeap *n;
- if ( ( n = malloc ( sizeof * n ) ) == NULL )
- {
- return NULL;
- }
+ if ( ( n = malloc ( sizeof * n ) ) == NULL )
+ {
+ return NULL;
+ }
- fh_initheap ( n );
- return n;
+ fh_initheap ( n );
+ return n;
}
-voidcmp fh_setcmp ( FibHeap * h, voidcmp fnct )
+voidcmp fh_setcmp ( FibHeap *h, voidcmp fnct )
{
- voidcmp oldfnct;
- oldfnct = h->fh_cmp_fnct;
- h->fh_cmp_fnct = fnct;
- return oldfnct;
+ voidcmp oldfnct;
+ oldfnct = h->fh_cmp_fnct;
+ h->fh_cmp_fnct = fnct;
+ return oldfnct;
}
-unsigned int fh_setneginf ( FibHeap * h, unsigned int data )
+unsigned int fh_setneginf ( FibHeap *h, unsigned int data )
{
- unsigned int old;
- old = h->fh_neginf;
- h->fh_neginf = data;
- return old;
+ unsigned int old;
+ old = h->fh_neginf;
+ h->fh_neginf = data;
+ return old;
}
-FibHeap * fh_union ( FibHeap * ha, FibHeap * hb )
+FibHeap *fh_union ( FibHeap *ha, FibHeap *hb )
{
- FibHeapNode * x;
+ FibHeapNode *x;
- if ( ha->fh_root == NULL || hb->fh_root == NULL )
- {
- /* either one or both are empty */
- if ( ha->fh_root == NULL )
- {
- fh_destroyheap ( ha );
- return hb;
- }
- else
- {
- fh_destroyheap ( hb );
- return ha;
- }
- }
+ if ( ha->fh_root == NULL || hb->fh_root == NULL )
+ {
+ /* either one or both are empty */
+ if ( ha->fh_root == NULL )
+ {
+ fh_destroyheap ( ha );
+ return hb;
+ }
+ else
+ {
+ fh_destroyheap ( hb );
+ return ha;
+ }
+ }
- ha->fh_root->fhe_left->fhe_right = hb->fh_root;
- hb->fh_root->fhe_left->fhe_right = ha->fh_root;
- x = ha->fh_root->fhe_left;
- ha->fh_root->fhe_left = hb->fh_root->fhe_left;
- hb->fh_root->fhe_left = x;
- ha->fh_n += hb->fh_n;
- /*
- * we probably should also keep stats on number of unions
- */
+ ha->fh_root->fhe_left->fhe_right = hb->fh_root;
+ hb->fh_root->fhe_left->fhe_right = ha->fh_root;
+ x = ha->fh_root->fhe_left;
+ ha->fh_root->fhe_left = hb->fh_root->fhe_left;
+ hb->fh_root->fhe_left = x;
+ ha->fh_n += hb->fh_n;
+ /*
+ * we probably should also keep stats on number of unions
+ */
- /* set fh_min if necessary */
- if ( fh_compare ( ha, hb->fh_min, ha->fh_min ) < 0 )
- {
- ha->fh_min = hb->fh_min;
- }
+ /* set fh_min if necessary */
+ if ( fh_compare ( ha, hb->fh_min, ha->fh_min ) < 0 )
+ {
+ ha->fh_min = hb->fh_min;
+ }
- fh_destroyheap ( hb );
- return ha;
+ fh_destroyheap ( hb );
+ return ha;
}
-void fh_deleteheap ( FibHeap * h )
+void fh_deleteheap ( FibHeap *h )
{
- freeMem_manager ( h->nodeMemory );
- h->nodeMemory = NULL;
- fh_destroyheap ( h );
+ freeMem_manager ( h->nodeMemory );
+ h->nodeMemory = NULL;
+ fh_destroyheap ( h );
}
/*
* Public Key Heap Functions
*/
-FibHeapNode * fh_insertkey ( FibHeap * h, Coordinate key, unsigned int data )
+FibHeapNode *fh_insertkey ( FibHeap *h, Coordinate key, unsigned int data )
{
- FibHeapNode * x;
+ FibHeapNode *x;
- if ( ( x = fhe_newelem ( h ) ) == NULL )
- {
- return NULL;
- }
+ if ( ( x = fhe_newelem ( h ) ) == NULL )
+ {
+ return NULL;
+ }
- /* just insert on root list, and make sure it's not the new min */
- x->fhe_data = data;
- x->fhe_key = key;
- fh_insertel ( h, x );
- return x;
+ /* just insert on root list, and make sure it's not the new min */
+ x->fhe_data = data;
+ x->fhe_key = key;
+ fh_insertel ( h, x );
+ return x;
}
-boolean fh_isempty ( FibHeap * h )
+boolean fh_isempty ( FibHeap *h )
{
- if ( h->fh_min == NULL )
- {
- return 1;
- }
- else
- {
- return 0;
- }
+ if ( h->fh_min == NULL )
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
}
-Coordinate fh_minkey ( FibHeap * h )
+Coordinate fh_minkey ( FibHeap *h )
{
- if ( h->fh_min == NULL )
- {
- return INT_MIN;
- }
+ if ( h->fh_min == NULL )
+ {
+ return INT_MIN;
+ }
- return h->fh_min->fhe_key;
+ return h->fh_min->fhe_key;
}
-unsigned int fh_replacekeydata ( FibHeap * h, FibHeapNode * x, Coordinate key, unsigned int data )
+unsigned int fh_replacekeydata ( FibHeap *h, FibHeapNode *x, Coordinate key, unsigned int data )
{
- unsigned int odata;
- Coordinate okey;
- FibHeapNode * y;
- int r;
- odata = x->fhe_data;
- okey = x->fhe_key;
+ unsigned int odata;
+ Coordinate okey;
+ FibHeapNode *y;
+ int r;
+ odata = x->fhe_data;
+ okey = x->fhe_key;
- /*
- * we can increase a key by deleting and reinserting, that
- * requires O(lgn) time.
- */
- if ( ( r = fh_comparedata ( h, key, data, x ) ) > 0 )
- {
- /* XXX - bad code! */
- abort ();
- }
+ /*
+ * we can increase a key by deleting and reinserting, that
+ * requires O(lgn) time.
+ */
+ if ( ( r = fh_comparedata ( h, key, data, x ) ) > 0 )
+ {
+ /* XXX - bad code! */
+ abort ();
+ }
- x->fhe_data = data;
- x->fhe_key = key;
+ x->fhe_data = data;
+ x->fhe_key = key;
- /* because they are equal, we don't have to do anything */
- if ( r == 0 )
- {
- return odata;
- }
+ /* because they are equal, we don't have to do anything */
+ if ( r == 0 )
+ {
+ return odata;
+ }
- y = x->fhe_p;
+ y = x->fhe_p;
- if ( h->fh_keys && okey == key )
- {
- return odata;
- }
+ if ( h->fh_keys && okey == key )
+ {
+ return odata;
+ }
- if ( y != NULL && fh_compare ( h, x, y ) <= 0 )
- {
- fh_cut ( h, x, y );
- fh_cascading_cut ( h, y );
- }
+ if ( y != NULL && fh_compare ( h, x, y ) <= 0 )
+ {
+ fh_cut ( h, x, y );
+ fh_cascading_cut ( h, y );
+ }
- /*
- * the = is so that the call from fh_delete will delete the proper
- * element.
- */
- if ( fh_compare ( h, x, h->fh_min ) <= 0 )
- {
- h->fh_min = x;
- }
+ /*
+ * the = is so that the call from fh_delete will delete the proper
+ * element.
+ */
+ if ( fh_compare ( h, x, h->fh_min ) <= 0 )
+ {
+ h->fh_min = x;
+ }
- return odata;
+ return odata;
}
-Coordinate fh_replacekey ( FibHeap * h, FibHeapNode * x, Coordinate key )
+Coordinate fh_replacekey ( FibHeap *h, FibHeapNode *x, Coordinate key )
{
- Coordinate ret;
- ret = x->fhe_key;
- ( void ) fh_replacekeydata ( h, x, key, x->fhe_data );
- return ret;
+ Coordinate ret;
+ ret = x->fhe_key;
+ ( void ) fh_replacekeydata ( h, x, key, x->fhe_data );
+ return ret;
}
/*
@@ -334,376 +334,378 @@ Coordinate fh_replacekey ( FibHeap * h, FibHeapNode * x, Coordinate key )
* NULL failed for some reason
* ptr token to use for manipulation of data
*/
-FibHeapNode * fh_insert ( FibHeap * h, unsigned int data )
+FibHeapNode *fh_insert ( FibHeap *h, unsigned int data )
{
- FibHeapNode * x;
+ FibHeapNode *x;
- if ( ( x = fhe_newelem ( h ) ) == NULL )
- {
- return NULL;
- }
+ if ( ( x = fhe_newelem ( h ) ) == NULL )
+ {
+ return NULL;
+ }
- /* just insert on root list, and make sure it's not the new min */
- x->fhe_data = data;
- fh_insertel ( h, x );
- return x;
+ /* just insert on root list, and make sure it's not the new min */
+ x->fhe_data = data;
+ fh_insertel ( h, x );
+ return x;
}
-unsigned int fh_min ( FibHeap * h )
+unsigned int fh_min ( FibHeap *h )
{
- if ( h->fh_min == NULL )
- {
- return 0;
- }
+ if ( h->fh_min == NULL )
+ {
+ return 0;
+ }
- return h->fh_min->fhe_data;
+ return h->fh_min->fhe_data;
}
-unsigned int fh_extractmin ( FibHeap * h )
+unsigned int fh_extractmin ( FibHeap *h )
{
- FibHeapNode * z;
- unsigned int ret = 0;
+ FibHeapNode *z;
+ unsigned int ret = 0;
- if ( h->fh_min != NULL )
- {
- z = fh_extractminel ( h );
- ret = z->fhe_data;
+ if ( h->fh_min != NULL )
+ {
+ z = fh_extractminel ( h );
+ ret = z->fhe_data;
#ifndef NO_FREE
- deallocateFibHeapEl ( z, h );
+ deallocateFibHeapEl ( z, h );
#endif
- }
+ }
- return ret;
+ return ret;
}
-unsigned int fh_replacedata ( FibHeapNode * x, unsigned int data )
+unsigned int fh_replacedata ( FibHeapNode *x, unsigned int data )
{
- unsigned int odata = x->fhe_data;
- x->fhe_data = data;
- return odata;
+ unsigned int odata = x->fhe_data;
+ x->fhe_data = data;
+ return odata;
}
-unsigned int fh_delete ( FibHeap * h, FibHeapNode * x )
+unsigned int fh_delete ( FibHeap *h, FibHeapNode *x )
{
- unsigned int k;
- k = x->fhe_data;
+ unsigned int k;
+ k = x->fhe_data;
- if ( !h->fh_keys )
- {
- fh_replacedata ( x, h->fh_neginf );
- }
- else
- {
- fh_replacekey ( h, x, INT_MIN );
- }
+ if ( !h->fh_keys )
+ {
+ fh_replacedata ( x, h->fh_neginf );
+ }
+ else
+ {
+ fh_replacekey ( h, x, INT_MIN );
+ }
- fh_extractmin ( h );
- return k;
+ fh_extractmin ( h );
+ return k;
}
/*
* begin of private element fuctions
*/
-static FibHeapNode * fh_extractminel ( FibHeap * h )
-{
- FibHeapNode * ret;
- FibHeapNode * x, *y, *orig;
- ret = h->fh_min;
- orig = NULL;
-
- /* put all the children on the root list */
- /* for true consistancy, we should use fhe_remove */
- for ( x = ret->fhe_child; x != orig && x != NULL; )
- {
- if ( orig == NULL )
- {
- orig = x;
- }
-
- y = x->fhe_right;
- x->fhe_p = NULL;
- fh_insertrootlist ( h, x );
- x = y;
- }
-
- /* remove minimum from root list */
- fh_removerootlist ( h, ret );
- h->fh_n--;
-
- /* if we aren't empty, consolidate the heap */
- if ( h->fh_n == 0 )
- {
- h->fh_min = NULL;
- }
- else
- {
- h->fh_min = ret->fhe_right;
- fh_consolidate ( h );
- }
-
- return ret;
-}
-
-static void fh_insertrootlist ( FibHeap * h, FibHeapNode * x )
-{
- if ( h->fh_root == NULL )
- {
- h->fh_root = x;
- x->fhe_left = x;
- x->fhe_right = x;
- return;
- }
-
- fhe_insertafter ( h->fh_root, x );
-}
-
-static void fh_removerootlist ( FibHeap * h, FibHeapNode * x )
-{
- if ( x->fhe_left == x )
- {
- h->fh_root = NULL;
- }
- else
- {
- h->fh_root = fhe_remove ( x );
- }
-}
-
-static void fh_consolidate ( FibHeap * h )
-{
- FibHeapNode ** a;
- FibHeapNode * w;
- FibHeapNode * y;
- FibHeapNode * x;
- IDnum i;
- IDnum d;
- IDnum D;
- fh_checkcons ( h );
- /* assign a the value of h->fh_cons so I don't have to rewrite code */
- D = h->fh_Dl + 1;
- a = h->fh_cons;
-
- for ( i = 0; i < D; i++ )
- {
- a[i] = NULL;
- }
-
- while ( ( w = h->fh_root ) != NULL )
- {
- x = w;
- fh_removerootlist ( h, w );
- d = x->fhe_degree;
-
- /* XXX - assert that d < D */
- while ( a[d] != NULL )
- {
- y = a[d];
-
- if ( fh_compare ( h, x, y ) > 0 )
- {
- swap ( FibHeapNode *, x, y );
- }
-
- fh_heaplink ( h, y, x );
- a[d] = NULL;
- d++;
- }
-
- a[d] = x;
- }
-
- h->fh_min = NULL;
-
- for ( i = 0; i < D; i++ )
- if ( a[i] != NULL )
- {
- fh_insertrootlist ( h, a[i] );
-
- if ( h->fh_min == NULL || fh_compare ( h, a[i], h->fh_min ) < 0 )
- {
- h->fh_min = a[i];
- }
- }
-}
-
-static void fh_heaplink ( FibHeap * h, FibHeapNode * y, FibHeapNode * x )
-{
- /* make y a child of x */
- if ( x->fhe_child == NULL )
- {
- x->fhe_child = y;
- }
- else
- {
- fhe_insertbefore ( x->fhe_child, y );
- }
+static FibHeapNode *fh_extractminel ( FibHeap *h )
+{
+ FibHeapNode *ret;
+ FibHeapNode *x, *y, *orig;
+ ret = h->fh_min;
+ orig = NULL;
+
+ /* put all the children on the root list */
+ /* for true consistancy, we should use fhe_remove */
+ for ( x = ret->fhe_child; x != orig && x != NULL; )
+ {
+ if ( orig == NULL )
+ {
+ orig = x;
+ }
+
+ y = x->fhe_right;
+ x->fhe_p = NULL;
+ fh_insertrootlist ( h, x );
+ x = y;
+ }
+
+ /* remove minimum from root list */
+ fh_removerootlist ( h, ret );
+ h->fh_n--;
+
+ /* if we aren't empty, consolidate the heap */
+ if ( h->fh_n == 0 )
+ {
+ h->fh_min = NULL;
+ }
+ else
+ {
+ h->fh_min = ret->fhe_right;
+ fh_consolidate ( h );
+ }
+
+ return ret;
+}
+
+static void fh_insertrootlist ( FibHeap *h, FibHeapNode *x )
+{
+ if ( h->fh_root == NULL )
+ {
+ h->fh_root = x;
+ x->fhe_left = x;
+ x->fhe_right = x;
+ return;
+ }
+
+ fhe_insertafter ( h->fh_root, x );
+}
+
+static void fh_removerootlist ( FibHeap *h, FibHeapNode *x )
+{
+ if ( x->fhe_left == x )
+ {
+ h->fh_root = NULL;
+ }
+ else
+ {
+ h->fh_root = fhe_remove ( x );
+ }
+}
+
+static void fh_consolidate ( FibHeap *h )
+{
+ FibHeapNode **a;
+ FibHeapNode *w;
+ FibHeapNode *y;
+ FibHeapNode *x;
+ IDnum i;
+ IDnum d;
+ IDnum D;
+ fh_checkcons ( h );
+ /* assign a the value of h->fh_cons so I don't have to rewrite code */
+ D = h->fh_Dl + 1;
+ a = h->fh_cons;
+
+ for ( i = 0; i < D; i++ )
+ {
+ a[i] = NULL;
+ }
+
+ while ( ( w = h->fh_root ) != NULL )
+ {
+ x = w;
+ fh_removerootlist ( h, w );
+ d = x->fhe_degree;
+
+ /* XXX - assert that d < D */
+ while ( a[d] != NULL )
+ {
+ y = a[d];
+
+ if ( fh_compare ( h, x, y ) > 0 )
+ {
+ swap ( FibHeapNode *, x, y );
+ }
+
+ fh_heaplink ( h, y, x );
+ a[d] = NULL;
+ d++;
+ }
+
+ a[d] = x;
+ }
+
+ h->fh_min = NULL;
+
+ for ( i = 0; i < D; i++ )
+ if ( a[i] != NULL )
+ {
+ fh_insertrootlist ( h, a[i] );
+
+ if ( h->fh_min == NULL || fh_compare ( h, a[i], h->fh_min ) < 0 )
+ {
+ h->fh_min = a[i];
+ }
+ }
+}
+
+static void fh_heaplink ( FibHeap *h, FibHeapNode *y, FibHeapNode *x )
+{
+ /* make y a child of x */
+ if ( x->fhe_child == NULL )
+ {
+ x->fhe_child = y;
+ }
+ else
+ {
+ fhe_insertbefore ( x->fhe_child, y );
+ }
- y->fhe_p = x;
- x->fhe_degree++;
- y->fhe_mark = 0;
+ y->fhe_p = x;
+ x->fhe_degree++;
+ y->fhe_mark = 0;
}
-static void fh_cut ( FibHeap * h, FibHeapNode * x, FibHeapNode * y )
-{
- fhe_remove ( x );
- y->fhe_degree--;
- fh_insertrootlist ( h, x );
- x->fhe_p = NULL;
- x->fhe_mark = 0;
-}
-
-static void fh_cascading_cut ( FibHeap * h, FibHeapNode * y )
+static void fh_cut ( FibHeap *h, FibHeapNode *x, FibHeapNode *y )
+{
+ fhe_remove ( x );
+ y->fhe_degree--;
+ fh_insertrootlist ( h, x );
+ x->fhe_p = NULL;
+ x->fhe_mark = 0;
+}
+
+static void fh_cascading_cut ( FibHeap *h, FibHeapNode *y )
{
- FibHeapNode * z;
-
- while ( ( z = y->fhe_p ) != NULL )
- {
- if ( y->fhe_mark == 0 )
- {
- y->fhe_mark = 1;
- return;
- }
- else
- {
- fh_cut ( h, y, z );
- y = z;
- }
- }
+ FibHeapNode *z;
+
+ while ( ( z = y->fhe_p ) != NULL )
+ {
+ if ( y->fhe_mark == 0 )
+ {
+ y->fhe_mark = 1;
+ return;
+ }
+ else
+ {
+ fh_cut ( h, y, z );
+ y = z;
+ }
+ }
}
/*
* begining of handling elements of fibheap
*/
-static FibHeapNode * fhe_newelem ( FibHeap * h )
+static FibHeapNode *fhe_newelem ( FibHeap *h )
{
- FibHeapNode * e;
+ FibHeapNode *e;
- if ( ( e = allocateFibHeapEl ( h ) ) == NULL )
- {
- return NULL;
- }
+ if ( ( e = allocateFibHeapEl ( h ) ) == NULL )
+ {
+ return NULL;
+ }
- fhe_initelem ( e );
- return e;
+ fhe_initelem ( e );
+ return e;
}
-static void fhe_initelem ( FibHeapNode * e )
+static void fhe_initelem ( FibHeapNode *e )
{
- e->fhe_degree = 0;
- e->fhe_mark = 0;
- e->fhe_p = NULL;
- e->fhe_child = NULL;
- e->fhe_left = e;
- e->fhe_right = e;
- e->fhe_data = 0;
+ e->fhe_degree = 0;
+ e->fhe_mark = 0;
+ e->fhe_p = NULL;
+ e->fhe_child = NULL;
+ e->fhe_left = e;
+ e->fhe_right = e;
+ e->fhe_data = 0;
}
-static void fhe_insertafter ( FibHeapNode * a, FibHeapNode * b )
+static void fhe_insertafter ( FibHeapNode *a, FibHeapNode *b )
{
- if ( a == a->fhe_right )
- {
- a->fhe_right = b;
- a->fhe_left = b;
- b->fhe_right = a;
- b->fhe_left = a;
- }
- else
- {
- b->fhe_right = a->fhe_right;
- a->fhe_right->fhe_left = b;
- a->fhe_right = b;
- b->fhe_left = a;
- }
+ if ( a == a->fhe_right )
+ {
+ a->fhe_right = b;
+ a->fhe_left = b;
+ b->fhe_right = a;
+ b->fhe_left = a;
+ }
+ else
+ {
+ b->fhe_right = a->fhe_right;
+ a->fhe_right->fhe_left = b;
+ a->fhe_right = b;
+ b->fhe_left = a;
+ }
}
-static inline void fhe_insertbefore ( FibHeapNode * a, FibHeapNode * b )
+static inline void fhe_insertbefore ( FibHeapNode *a, FibHeapNode *b )
{
- fhe_insertafter ( a->fhe_left, b );
+ fhe_insertafter ( a->fhe_left, b );
}
-static FibHeapNode * fhe_remove ( FibHeapNode * x )
+static FibHeapNode *fhe_remove ( FibHeapNode *x )
{
- FibHeapNode * ret;
+ FibHeapNode *ret;
- if ( x == x->fhe_left )
- {
- ret = NULL;
- }
- else
- {
- ret = x->fhe_left;
- }
+ if ( x == x->fhe_left )
+ {
+ ret = NULL;
+ }
+ else
+ {
+ ret = x->fhe_left;
+ }
- /* fix the parent pointer */
- if ( x->fhe_p != NULL && x->fhe_p->fhe_child == x )
- {
- x->fhe_p->fhe_child = ret;
- }
+ /* fix the parent pointer */
+ if ( x->fhe_p != NULL && x->fhe_p->fhe_child == x )
+ {
+ x->fhe_p->fhe_child = ret;
+ }
- x->fhe_right->fhe_left = x->fhe_left;
- x->fhe_left->fhe_right = x->fhe_right;
- /* clear out hanging pointers */
- x->fhe_p = NULL;
- x->fhe_left = x;
- x->fhe_right = x;
- return ret;
+ x->fhe_right->fhe_left = x->fhe_left;
+ x->fhe_left->fhe_right = x->fhe_right;
+ /* clear out hanging pointers */
+ x->fhe_p = NULL;
+ x->fhe_left = x;
+ x->fhe_right = x;
+ return ret;
}
-static void fh_checkcons ( FibHeap * h )
+static void fh_checkcons ( FibHeap *h )
{
- IDnum oDl;
+ IDnum oDl;
- /* make sure we have enough memory allocated to "reorganize" */
- if ( h->fh_Dl == -1 || h->fh_n > ( 1 << h->fh_Dl ) )
- {
- oDl = h->fh_Dl;
+ /* make sure we have enough memory allocated to "reorganize" */
+ if ( h->fh_Dl == -1 || h->fh_n > ( 1 << h->fh_Dl ) )
+ {
+ oDl = h->fh_Dl;
- if ( ( h->fh_Dl = ceillog2 ( h->fh_n ) + 1 ) < 8 )
- {
- h->fh_Dl = 8;
- }
+ if ( ( h->fh_Dl = ceillog2 ( h->fh_n ) + 1 ) < 8 )
+ {
+ h->fh_Dl = 8;
+ }
- if ( oDl != h->fh_Dl )
- { h->fh_cons = ( FibHeapNode ** ) realloc ( h->fh_cons, sizeof * h->fh_cons * ( h->fh_Dl + 1 ) ); }
+ if ( oDl != h->fh_Dl )
+ {
+ h->fh_cons = ( FibHeapNode ** ) realloc ( h->fh_cons, sizeof * h->fh_cons * ( h->fh_Dl + 1 ) );
+ }
- if ( h->fh_cons == NULL )
- {
- abort ();
- }
- }
+ if ( h->fh_cons == NULL )
+ {
+ abort ();
+ }
+ }
}
-static int fh_compare ( FibHeap * h, FibHeapNode * a, FibHeapNode * b )
+static int fh_compare ( FibHeap *h, FibHeapNode *a, FibHeapNode *b )
{
- if ( a->fhe_key < b->fhe_key )
- {
- return -1;
- }
+ if ( a->fhe_key < b->fhe_key )
+ {
+ return -1;
+ }
- if ( a->fhe_key == b->fhe_key )
- {
- return 0;
- }
+ if ( a->fhe_key == b->fhe_key )
+ {
+ return 0;
+ }
- return 1;
+ return 1;
}
-static int fh_comparedata ( FibHeap * h, Coordinate key, unsigned int data, FibHeapNode * b )
+static int fh_comparedata ( FibHeap *h, Coordinate key, unsigned int data, FibHeapNode *b )
{
- FibHeapNode a;
- a.fhe_key = key;
- a.fhe_data = data;
- return fh_compare ( h, &a, b );
+ FibHeapNode a;
+ a.fhe_key = key;
+ a.fhe_data = data;
+ return fh_compare ( h, &a, b );
}
-static void fh_insertel ( FibHeap * h, FibHeapNode * x )
+static void fh_insertel ( FibHeap *h, FibHeapNode *x )
{
- fh_insertrootlist ( h, x );
+ fh_insertrootlist ( h, x );
- if ( h->fh_min == NULL || ( h->fh_keys ? x->fhe_key < h->fh_min->fhe_key : h->fh_cmp_fnct ( x->fhe_data, h->fh_min->fhe_data ) < 0 ) )
- {
- h->fh_min = x;
- }
+ if ( h->fh_min == NULL || ( h->fh_keys ? x->fhe_key < h->fh_min->fhe_key : h->fh_cmp_fnct ( x->fhe_data, h->fh_min->fhe_data ) < 0 ) )
+ {
+ h->fh_min = x;
+ }
- h->fh_n++;
+ h->fh_n++;
}
diff --git a/standardPregraph/fibHeap.c b/standardPregraph/fibHeap.c
index f690fb8..7ade701 100644
--- a/standardPregraph/fibHeap.c
+++ b/standardPregraph/fibHeap.c
@@ -1,7 +1,7 @@
/*
* fibHeap.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -24,54 +24,54 @@
// Constructor
// Memory allocated
-FibHeap * newFibHeap ()
+FibHeap *newFibHeap ()
{
- return fh_makekeyheap ();
+ return fh_makekeyheap ();
}
// Add new node into heap with a key, and a pointer to the specified node
-FibHeapNode * insertNodeIntoHeap ( FibHeap * heap, Coordinate key, unsigned int node )
+FibHeapNode *insertNodeIntoHeap ( FibHeap *heap, Coordinate key, unsigned int node )
{
- return fh_insertkey ( heap, key, node );
+ return fh_insertkey ( heap, key, node );
}
// Returns smallest key in heap
-Coordinate minKeyOfHeap ( FibHeap * heap )
+Coordinate minKeyOfHeap ( FibHeap *heap )
{
- return fh_minkey ( heap );
+ return fh_minkey ( heap );
}
// Replaces the key for a given node
-Coordinate replaceKeyInHeap ( FibHeap * heap, FibHeapNode * node, Coordinate newKey )
+Coordinate replaceKeyInHeap ( FibHeap *heap, FibHeapNode *node, Coordinate newKey )
{
- return fh_replacekey ( heap, node, newKey );
+ return fh_replacekey ( heap, node, newKey );
}
// Removes the node with the shortest key, then returns it.
-unsigned int removeNextNodeFromHeap ( FibHeap * heap )
+unsigned int removeNextNodeFromHeap ( FibHeap *heap )
{
- return ( unsigned int ) fh_extractmin ( heap );
+ return ( unsigned int ) fh_extractmin ( heap );
}
-boolean IsHeapEmpty ( FibHeap * heap )
+boolean IsHeapEmpty ( FibHeap *heap )
{
- return fh_isempty ( heap );
+ return fh_isempty ( heap );
}
// Destructor
-void destroyHeap ( FibHeap * heap )
+void destroyHeap ( FibHeap *heap )
{
- fh_deleteheap ( heap );
+ fh_deleteheap ( heap );
}
// Replace the node pointed to by a heap node
-void replaceValueInHeap ( FibHeapNode * node, unsigned int newValue )
+void replaceValueInHeap ( FibHeapNode *node, unsigned int newValue )
{
- fh_replacedata ( node, newValue );
+ fh_replacedata ( node, newValue );
}
// Remove unwanted node
-void destroyNodeInHeap ( FibHeapNode * node, FibHeap * heap )
+void destroyNodeInHeap ( FibHeapNode *node, FibHeap *heap )
{
- fh_delete ( heap, node );
+ fh_delete ( heap, node );
}
diff --git a/standardPregraph/hashFunction.c b/standardPregraph/hashFunction.c
index bc7065d..84cf8f9 100644
--- a/standardPregraph/hashFunction.c
+++ b/standardPregraph/hashFunction.c
@@ -1,7 +1,7 @@
/*
* hashFunction.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -27,58 +27,58 @@
static int crc_table[256] =
{
- 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
- 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
- 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
- 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
- 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
- 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
- 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
- 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
- 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
- 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
- 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
- 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
- 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
- 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
- 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
- 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
- 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
- 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
- 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
- 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
- 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
- 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
- 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
- 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
- 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
- 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
- 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
- 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
- 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
- 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
- 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
- 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
- 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
- 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
- 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
- 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
- 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
- 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
- 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
- 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
- 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
- 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
- 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
- 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
- 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
- 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
- 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
- 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
- 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
- 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
- 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
- 0x2d02ef8d
+ 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419,
+ 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4,
+ 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07,
+ 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de,
+ 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856,
+ 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
+ 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4,
+ 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b,
+ 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3,
+ 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a,
+ 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599,
+ 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
+ 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190,
+ 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f,
+ 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e,
+ 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01,
+ 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed,
+ 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
+ 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3,
+ 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2,
+ 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a,
+ 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5,
+ 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010,
+ 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
+ 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17,
+ 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6,
+ 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615,
+ 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8,
+ 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344,
+ 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
+ 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a,
+ 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5,
+ 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1,
+ 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c,
+ 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef,
+ 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
+ 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe,
+ 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31,
+ 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c,
+ 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713,
+ 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b,
+ 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
+ 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1,
+ 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c,
+ 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278,
+ 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7,
+ 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66,
+ 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
+ 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605,
+ 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8,
+ 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b,
+ 0x2d02ef8d
};
typedef int ( *CRC32CFunctionPtr ) ( uint32_t, const char *, size_t );
@@ -86,70 +86,74 @@ static CRC32CFunctionPtr crc32;
static uint32_t cpuid ( uint32_t functionInput )
{
- uint32_t eax;
- uint32_t ebx;
- uint32_t ecx;
- uint32_t edx;
+ uint32_t eax;
+ uint32_t ebx;
+ uint32_t ecx;
+ uint32_t edx;
#ifdef __PIC__
- asm ( "pushl %%ebx\n\t" /* save %ebx */
- "cpuid\n\t"
- "movl %%ebx, %[ebx]\n\t" /* save what cpuid just put in %ebx */
- "popl %%ebx" : "=a" ( eax ), [ebx] "=r" ( ebx ), "=c" ( ecx ), "=d" ( edx ) : "a" ( functionInput )
- : "cc" );
+ asm ( "pushl %%ebx\n\t" /* save %ebx */
+ "cpuid\n\t"
+ "movl %%ebx, %[ebx]\n\t" /* save what cpuid just put in %ebx */
+ "popl %%ebx" : "=a" ( eax ), [ebx] "=r" ( ebx ), "=c" ( ecx ), "=d" ( edx ) : "a" ( functionInput )
+ : "cc" );
#else
- asm ( "cpuid" : "=a" ( eax ), "=b" ( ebx ), "=c" ( ecx ), "=d" ( edx ) : "a" ( functionInput ) );
+ asm ( "cpuid" : "=a" ( eax ), "=b" ( ebx ), "=c" ( ecx ), "=d" ( edx ) : "a" ( functionInput ) );
#endif
- return ecx;
+ return ecx;
}
-static inline int crc32cHardware64 ( uint32_t crc, const char * data, size_t length )
+static inline int crc32cHardware64 ( uint32_t crc, const char *data, size_t length )
{
- size_t i;
+ size_t i;
- for ( i = 0; i < ( length / sizeof ( uint64_t ) ); ++i )
- {
- crc = __builtin_ia32_crc32di ( crc, * ( uint64_t * ) data );
- data += sizeof ( uint64_t );
- }
+ for ( i = 0; i < ( length / sizeof ( uint64_t ) ); ++i )
+ {
+ crc = __builtin_ia32_crc32di ( crc, * ( uint64_t * ) data );
+ data += sizeof ( uint64_t );
+ }
- return ( int ) crc;
+ return ( int ) crc;
}
-static inline int crc32cTable ( uint32_t crc, const char * buf, size_t len )
+static inline int crc32cTable ( uint32_t crc, const char *buf, size_t len )
{
- while ( len-- )
- {
- crc = crc_table[ ( ( int ) crc ^ ( *buf++ ) ) & 0xff] ^ ( crc >> 8 );
- }
+ while ( len-- )
+ {
+ crc = crc_table[ ( ( int ) crc ^ ( *buf++ ) ) & 0xff] ^ ( crc >> 8 );
+ }
- return crc ^ 0xffffffff;
+ return crc ^ 0xffffffff;
}
CRC32CFunctionPtr detectBestCRC32C()
{
- static const int SSE42_BIT = 20;
- uint32_t ecx = cpuid ( 1 );
- bool hasSSE42 = ecx & ( 1 << SSE42_BIT );
+ static const int SSE42_BIT = 20;
+ uint32_t ecx = cpuid ( 1 );
+ bool hasSSE42 = ecx & ( 1 << SSE42_BIT );
- if ( hasSSE42 )
- {
- //fprintf(stderr, "SSE4.2 enabled\n");
- return crc32cTable;
- }
- else
- {
- //fprintf(stderr, "SSE4.2 disabled\n");
- return crc32cTable;
- }
+ if ( hasSSE42 )
+ {
+#if defined __SSE4_2__
+ fprintf(stderr, "Using Hardware Accelerated CRC32c\n");
+ return crc32cHardware64;
+#else
+ return crc32cTable;
+#endif
+ }
+ else
+ {
+ //fprintf(stderr, "SSE4.2 disabled\n");
+ return crc32cTable;
+ }
}
ubyte8 hash_kmer ( Kmer kmer )
{
- return ( crc32 ( 0, ( char * ) &kmer, sizeof ( Kmer ) ) );
+ return ( crc32 ( 0, ( char * ) &kmer, sizeof ( Kmer ) ) );
}
void crc32c_Init()
{
- crc32 = detectBestCRC32C();
+ crc32 = detectBestCRC32C();
}
diff --git a/standardPregraph/inc/check.h b/standardPregraph/inc/check.h
index 278620a..2bf7a16 100644
--- a/standardPregraph/inc/check.h
+++ b/standardPregraph/inc/check.h
@@ -1,7 +1,7 @@
/*
* inc/check.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -21,7 +21,7 @@
*/
-extern void * ckalloc ( unsigned long long amount );
-extern void * ckrealloc ( void * p, size_t new_size, size_t old_size );
-extern FILE * ckopen ( char * name, char * mode );
+extern void *ckalloc ( unsigned long long amount );
+extern void *ckrealloc ( void *p, size_t new_size, size_t old_size );
+extern FILE *ckopen ( char *name, char *mode );
diff --git a/standardPregraph/inc/darray.h b/standardPregraph/inc/darray.h
index a3a4a35..364df49 100644
--- a/standardPregraph/inc/darray.h
+++ b/standardPregraph/inc/darray.h
@@ -1,7 +1,7 @@
/*
* inc/darray.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -29,17 +29,17 @@
typedef struct dynamic_array
{
- void * array;
- long long array_size;
- size_t item_size;
- long long item_c;
+ void *array;
+ long long array_size;
+ size_t item_size;
+ long long item_c;
} DARRAY;
-void * darrayPut ( DARRAY * darray, long long index );
-void * darrayGet ( DARRAY * darray, long long index );
-DARRAY * createDarray ( int num_items, size_t unit_size );
-void freeDarray ( DARRAY * darray );
-void emptyDarray ( DARRAY * darray );
+void *darrayPut ( DARRAY *darray, long long index );
+void *darrayGet ( DARRAY *darray, long long index );
+DARRAY *createDarray ( int num_items, size_t unit_size );
+void freeDarray ( DARRAY *darray );
+void emptyDarray ( DARRAY *darray );
#endif
diff --git a/standardPregraph/inc/def.h b/standardPregraph/inc/def.h
index c4ba677..f5819f0 100644
--- a/standardPregraph/inc/def.h
+++ b/standardPregraph/inc/def.h
@@ -1,7 +1,7 @@
/*
* inc/def.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -46,71 +46,71 @@ int b_ban;
#ifdef MER127
typedef struct kmer
{
- unsigned long long high1, low1, high2, low2;
+ unsigned long long high1, low1, high2, low2;
} Kmer;
#else
typedef struct kmer
{
- unsigned long long high, low;
+ unsigned long long high, low;
} Kmer;
#endif
typedef struct preedge
{
- Kmer from_node;
- Kmer to_node;
- char * seq;
- int length;
- unsigned short cvg: 14;
- unsigned bal_edge: 2; //indicate whether it's bal_edge is the previous edge, next edge or itself
+ Kmer from_node;
+ Kmer to_node;
+ char *seq;
+ int length;
+ unsigned short cvg: 14;
+ unsigned bal_edge: 2; //indicate whether it's bal_edge is the previous edge, next edge or itself
} preEDGE;
typedef struct readinterval //record two paths of bubble
{
- int readid;
- unsigned int edgeid;
- int start;
- struct readinterval * bal_rv;
- struct readinterval * nextOnEdge; // the downstream in the path
- struct readinterval * prevOnEdge; // the upstream in the path
- struct readinterval * nextInRead;
- struct readinterval * prevInRead;
+ int readid;
+ unsigned int edgeid;
+ int start;
+ struct readinterval *bal_rv;
+ struct readinterval *nextOnEdge; // the downstream in the path
+ struct readinterval *prevOnEdge; // the upstream in the path
+ struct readinterval *nextInRead;
+ struct readinterval *prevInRead;
} READINTERVAL;
struct arc;
typedef struct edge
{
- unsigned int from_vt; //from kmer id
- unsigned int to_vt; //to kmer id
- int length; //edge length
- unsigned short cvg: 14; //coverage
- unsigned short bal_edge: 2; // 2:smaller 0:larger 1:rev-com equal to itself
- unsigned short multi: 14;
- unsigned short deleted : 1;
- unsigned short flag : 1;
- char * seq; //edge content
- READINTERVAL * rv;
- struct arc * arcs;
- long long * markers; //reads id
+ unsigned int from_vt; //from kmer id
+ unsigned int to_vt; //to kmer id
+ int length; //edge length
+ unsigned short cvg: 14; //coverage
+ unsigned short bal_edge: 2; // 2:smaller 0:larger 1:rev-com equal to itself
+ unsigned short multi: 14;
+ unsigned short deleted : 1;
+ unsigned short flag : 1;
+ char *seq; //edge content
+ READINTERVAL *rv;
+ struct arc *arcs;
+ long long *markers; //reads id
} EDGE;
typedef struct edge_sub
{
- unsigned int from_vt; //from kmer id
- unsigned int to_vt; //to kmer id
- int length; //edge length
- char * seq; //edge content
+ unsigned int from_vt; //from kmer id
+ unsigned int to_vt; //to kmer id
+ int length; //edge length
+ char *seq; //edge content
} EDGE_SUB;
typedef struct edge_pt
{
- EDGE * edge;
- struct edge_pt * next;
+ EDGE *edge;
+ struct edge_pt *next;
} EDGE_PT;
typedef struct vertex
{
- Kmer kmer;
+ Kmer kmer;
} VERTEX;
/*
typedef struct connection
@@ -141,37 +141,37 @@ typedef struct connection
*/
typedef struct connection
{
- unsigned int contigID;
- int gapLen;
-
- unsigned short maxGap;
- unsigned char minGap;
- unsigned char bySmall: 1;
- unsigned char weakPoint: 1;
- unsigned char smallIns: 1;
- unsigned char newIns: 1;
-
- unsigned char weightNotInherit;
- unsigned char weight;
- unsigned char maxSingleWeight;
- unsigned char mask : 1;
- unsigned char used : 1;
- unsigned char weak : 1;
- unsigned char deleted : 1;
- unsigned char prevInScaf : 1;
- unsigned char inherit : 1;
- unsigned char checking : 1;
- unsigned char singleInScaf : 1;
- struct connection * nextInScaf;
- struct connection * next;
- struct connection * nextInLookupTable;
+ unsigned int contigID;
+ int gapLen;
+
+ unsigned short maxGap;
+ unsigned char minGap;
+ unsigned char bySmall: 1;
+ unsigned char weakPoint: 1;
+ unsigned char smallIns: 1;
+ unsigned char newIns: 1;
+
+ unsigned char weightNotInherit;
+ unsigned char weight;
+ unsigned char maxSingleWeight;
+ unsigned char mask : 1;
+ unsigned char used : 1;
+ unsigned char weak : 1;
+ unsigned char deleted : 1;
+ unsigned char prevInScaf : 1;
+ unsigned char inherit : 1;
+ unsigned char checking : 1;
+ unsigned char singleInScaf : 1;
+ struct connection *nextInScaf;
+ struct connection *next;
+ struct connection *nextInLookupTable;
} CONNECT;
typedef struct prearc
{
- unsigned int to_ed; // the destination edge of prearc
- unsigned int multiplicity;
- struct prearc * next;
+ unsigned int to_ed; // the destination edge of prearc
+ unsigned int multiplicity;
+ struct prearc *next;
} preARC;
/*
typedef struct contig
@@ -195,161 +195,161 @@ typedef struct contig
*/
typedef struct contig
{
- unsigned int from_vt; // the first kmer of the contig
- unsigned int to_vt; // the last kmer of the contig
- unsigned int length;
- unsigned short indexInScaf; // the index in the scaffold
- unsigned char cvg;
- unsigned char bal_edge: 2; // 0, 1 or 2
- unsigned char mask : 1;
- unsigned char flag : 1;
- unsigned char multi: 1;
- unsigned char inSubGraph: 1;
- unsigned char bubbleInScaff: 1;
- char * seq;
- CONNECT * downwardConnect; // record the links to other contigs
- preARC * arcs;
- STACK * closeReads;
+ unsigned int from_vt; // the first kmer of the contig
+ unsigned int to_vt; // the last kmer of the contig
+ unsigned int length;
+ unsigned short indexInScaf; // the index in the scaffold
+ unsigned char cvg;
+ unsigned char bal_edge: 2; // 0, 1 or 2
+ unsigned char mask : 1;
+ unsigned char flag : 1;
+ unsigned char multi: 1;
+ unsigned char inSubGraph: 1;
+ unsigned char bubbleInScaff: 1;
+ char *seq;
+ CONNECT *downwardConnect; // record the links to other contigs
+ preARC *arcs;
+ STACK *closeReads;
} CONTIG;
typedef struct read_nearby
{
- int len;
- int dis; // dis to nearby contig or scaffold's start position
- long long seqStarter; //sequence start position in dynamic array
+ int len;
+ int dis; // dis to nearby contig or scaffold's start position
+ long long seqStarter; //sequence start position in dynamic array
} READNEARBY;
typedef struct annotation
{
- unsigned long long readID;
- unsigned int contigID;
- int pos;
+ unsigned long long readID;
+ unsigned int contigID;
+ int pos;
} ANNOTATION;
typedef struct parameter
{
- unsigned char threadID;
- void ** hash_table;
- unsigned char * mainSignal;
- unsigned char * selfSignal;
+ unsigned char threadID;
+ void **hash_table;
+ unsigned char *mainSignal;
+ unsigned char *selfSignal;
} PARAMETER;
typedef struct lightannot
{
- int contigID;
- int pos;
+ int contigID;
+ int pos;
} LIGHTANNOT;
typedef struct edgepatch
{
- Kmer from_kmer, to_kmer;
- unsigned int length;
- char bal_edge;
+ Kmer from_kmer, to_kmer;
+ unsigned int length;
+ char bal_edge;
} EDGEPATCH;
typedef struct lightctg
{
- unsigned int index;
- int length;
- char * seq;
+ unsigned int index;
+ int length;
+ char *seq;
} LIGHTCTG;
typedef struct arc
{
- unsigned int to_ed;
- unsigned int multiplicity;
- struct arc * prev;
- struct arc * next;
- struct arc * bal_arc;
- struct arc * nextInLookupTable;
+ unsigned int to_ed;
+ unsigned int multiplicity;
+ struct arc *prev;
+ struct arc *next;
+ struct arc *bal_arc;
+ struct arc *nextInLookupTable;
} ARC;
typedef struct arcexist
{
- Kmer kmer;
- struct arcexist * left;
- struct arcexist * right;
+ Kmer kmer;
+ struct arcexist *left;
+ struct arcexist *right;
} ARCEXIST;
typedef struct lib_info
{
- int min_ins;
- int max_ins;
- int avg_ins;
- int rd_len_cutoff; //read length cutoff
- int reverse;
- int asm_flag;
- int map_len;
- int pair_num_cut;
- int rank;
- //indicate which file is next to be read
- int curr_type;
- int curr_index;
-
- //file handlers to opened files
- FILE * fp1;
- FILE * fp2;
- boolean f1_start;
- boolean f2_start;
- //whether last read is read1 in pair
- int paired; // 0 -- single; 1 -- read1; 2 -- read2;
-
- //type1
- char ** a1_fname;
- char ** a2_fname;
- int num_a1_file;
- int num_a2_file;
-
- //type2
- char ** q1_fname;
- char ** q2_fname;
- int num_q1_file;
- int num_q2_file;
-
- //type3
- char ** p_fname;
- int num_p_file; //fasta only
-
- //type4 &5
- char ** s_a_fname;
- int num_s_a_file;
- char ** s_q_fname;
- int num_s_q_file;
-
- samfile_t * fp3; //the file handle to read bam file
- char ** b_fname; //the name of the bam file
- int num_b_file; //the number of the bam file
+ int min_ins;
+ int max_ins;
+ int avg_ins;
+ int rd_len_cutoff; //read length cutoff
+ int reverse;
+ int asm_flag;
+ int map_len;
+ int pair_num_cut;
+ int rank;
+ //indicate which file is next to be read
+ int curr_type;
+ int curr_index;
+
+ //file handlers to opened files
+ FILE *fp1;
+ FILE *fp2;
+ boolean f1_start;
+ boolean f2_start;
+ //whether last read is read1 in pair
+ int paired; // 0 -- single; 1 -- read1; 2 -- read2;
+
+ //type1
+ char **a1_fname;
+ char **a2_fname;
+ int num_a1_file;
+ int num_a2_file;
+
+ //type2
+ char **q1_fname;
+ char **q2_fname;
+ int num_q1_file;
+ int num_q2_file;
+
+ //type3
+ char **p_fname;
+ int num_p_file; //fasta only
+
+ //type4 &5
+ char **s_a_fname;
+ int num_s_a_file;
+ char **s_q_fname;
+ int num_s_q_file;
+
+ samfile_t *fp3; //the file handle to read bam file
+ char **b_fname; //the name of the bam file
+ int num_b_file; //the number of the bam file
} LIB_INFO;
typedef struct ctg4heap
{
- unsigned int ctgID;
- int dis;
- unsigned char ds_shut4dheap: 1; // ignore downstream connections
- unsigned char us_shut4dheap: 1; // ignore upstream connections
- unsigned char ds_shut4uheap: 1; // ignore downstream connections
- unsigned char us_shut4uheap: 1; // ignore upstream connections
+ unsigned int ctgID;
+ int dis;
+ unsigned char ds_shut4dheap: 1; // ignore downstream connections
+ unsigned char us_shut4dheap: 1; // ignore upstream connections
+ unsigned char ds_shut4uheap: 1; // ignore downstream connections
+ unsigned char us_shut4uheap: 1; // ignore upstream connections
} CTGinHEAP;
typedef struct ctg4scaf
{
- unsigned int ctgID;
- int start;
- int end; //position in scaff
- unsigned int cutHead : 8;
- unsigned int cutTail : 7;
- unsigned int scaftig_start : 1; //is it a scaftig starter
- unsigned int mask : 1; // is it masked for further operations
- unsigned int gapSeqLen: 15;
- int gapSeqOffset;
+ unsigned int ctgID;
+ int start;
+ int end; //position in scaff
+ unsigned int cutHead : 8;
+ unsigned int cutTail : 7;
+ unsigned int scaftig_start : 1; //is it a scaftig starter
+ unsigned int mask : 1; // is it masked for further operations
+ unsigned int gapSeqLen: 15;
+ int gapSeqOffset;
} CTGinSCAF;
typedef struct pe_info
{
- int insertS;
- long long PE_bound;
- int rank;
- int pair_num_cut;
+ int insertS;
+ long long PE_bound;
+ int rank;
+ int pair_num_cut;
} PE_INFO;
#endif
diff --git a/standardPregraph/inc/def2.h b/standardPregraph/inc/def2.h
index ead7cfd..d0a5f5d 100644
--- a/standardPregraph/inc/def2.h
+++ b/standardPregraph/inc/def2.h
@@ -1,7 +1,7 @@
/*
* inc/def2.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -34,33 +34,33 @@ typedef struct dfibheap_el DFibHeapNode;
//Memory manager
typedef struct block_start
{
- struct block_start * next;
+ struct block_start *next;
} BLOCK_START;
typedef struct recycle_mark
{
- struct recycle_mark * next;
+ struct recycle_mark *next;
} RECYCLE_MARK;
typedef struct mem_manager
{
- BLOCK_START * block_list;
- int index_in_block;
- int items_per_block;
- size_t item_size;
- RECYCLE_MARK * recycle_list;
- unsigned long long counter;
+ BLOCK_START *block_list;
+ int index_in_block;
+ int items_per_block;
+ size_t item_size;
+ RECYCLE_MARK *recycle_list;
+ unsigned long long counter;
} MEM_MANAGER;
struct dfibheap_el
{
- int dfhe_degree;
- boolean dfhe_mark;
- DFibHeapNode * dfhe_p;
- DFibHeapNode * dfhe_child;
- DFibHeapNode * dfhe_left;
- DFibHeapNode * dfhe_right;
- Time dfhe_key;
- unsigned int dfhe_data;//void *dfhe_data;
+ int dfhe_degree;
+ boolean dfhe_mark;
+ DFibHeapNode *dfhe_p;
+ DFibHeapNode *dfhe_child;
+ DFibHeapNode *dfhe_left;
+ DFibHeapNode *dfhe_right;
+ Time dfhe_key;
+ unsigned int dfhe_data;//void *dfhe_data;
};
#endif
diff --git a/standardPregraph/inc/dfib.h b/standardPregraph/inc/dfib.h
index d0ad90a..c4e8f0a 100644
--- a/standardPregraph/inc/dfib.h
+++ b/standardPregraph/inc/dfib.h
@@ -41,8 +41,8 @@
#include "def2.h" //#include "globals.h"
/* functions for key heaps */
-DFibHeap * dfh_makekeyheap ( void );
-DFibHeapNode * dfh_insertkey ( DFibHeap *, Time, unsigned int );
+DFibHeap *dfh_makekeyheap ( void );
+DFibHeapNode *dfh_insertkey ( DFibHeap *, Time, unsigned int );
Time dfh_replacekey ( DFibHeap *, DFibHeapNode *, Time );
unsigned int dfh_replacekeydata ( DFibHeap *, DFibHeapNode *, Time, unsigned int );
diff --git a/standardPregraph/inc/dfibHeap.h b/standardPregraph/inc/dfibHeap.h
index 53b6ff5..ee419e3 100644
--- a/standardPregraph/inc/dfibHeap.h
+++ b/standardPregraph/inc/dfibHeap.h
@@ -1,7 +1,7 @@
/*
* inc/dfibHeap.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -23,23 +23,23 @@
#ifndef _DFIBHEAP_H_
#define _DFIBHEAP_H_
-DFibHeap * newDFibHeap();
+DFibHeap *newDFibHeap();
-DFibHeapNode * insertNodeIntoDHeap ( DFibHeap * heap, Time key, unsigned int node );
+DFibHeapNode *insertNodeIntoDHeap ( DFibHeap *heap, Time key, unsigned int node );
-Time replaceKeyInDHeap ( DFibHeap * heap, DFibHeapNode * node, Time newKey );
+Time replaceKeyInDHeap ( DFibHeap *heap, DFibHeapNode *node, Time newKey );
-unsigned int removeNextNodeFromDHeap ( DFibHeap * heap );
+unsigned int removeNextNodeFromDHeap ( DFibHeap *heap );
-void destroyDHeap ( DFibHeap * heap );
+void destroyDHeap ( DFibHeap *heap );
-boolean HasMin ( DFibHeap * h );
+boolean HasMin ( DFibHeap *h );
-void replaceValueInDHeap ( DFibHeapNode * node, unsigned int newValue );
+void replaceValueInDHeap ( DFibHeapNode *node, unsigned int newValue );
-void * destroyNodeInDHeap ( DFibHeapNode * node, DFibHeap * heap );
+void *destroyNodeInDHeap ( DFibHeapNode *node, DFibHeap *heap );
-IDnum getDFibHeapSize ( DFibHeap * heap );
+IDnum getDFibHeapSize ( DFibHeap *heap );
-Time getKey ( DFibHeapNode * node );
+Time getKey ( DFibHeapNode *node );
#endif
diff --git a/standardPregraph/inc/dfibpriv.h b/standardPregraph/inc/dfibpriv.h
index ed63dc1..f3011b9 100644
--- a/standardPregraph/inc/dfibpriv.h
+++ b/standardPregraph/inc/dfibpriv.h
@@ -44,36 +44,36 @@
* specific node operations
*/
-static DFibHeapNode * dfhe_newelem ( DFibHeap * );
-static void dfhe_insertafter ( DFibHeapNode * a, DFibHeapNode * b );
-static inline void dfhe_insertbefore ( DFibHeapNode * a, DFibHeapNode * b );
-static DFibHeapNode * dfhe_remove ( DFibHeapNode * a );
+static DFibHeapNode *dfhe_newelem ( DFibHeap * );
+static void dfhe_insertafter ( DFibHeapNode *a, DFibHeapNode *b );
+static inline void dfhe_insertbefore ( DFibHeapNode *a, DFibHeapNode *b );
+static DFibHeapNode *dfhe_remove ( DFibHeapNode *a );
/*
* global heap operations
*/
struct dfibheap
{
- MEM_MANAGER * nodeMemory;
- IDnum dfh_n;
- IDnum dfh_Dl;
- DFibHeapNode ** dfh_cons;
- DFibHeapNode * dfh_min;
- DFibHeapNode * dfh_root;
+ MEM_MANAGER *nodeMemory;
+ IDnum dfh_n;
+ IDnum dfh_Dl;
+ DFibHeapNode **dfh_cons;
+ DFibHeapNode *dfh_min;
+ DFibHeapNode *dfh_root;
};
static void dfh_insertrootlist ( DFibHeap *, DFibHeapNode * );
static void dfh_removerootlist ( DFibHeap *, DFibHeapNode * );
static void dfh_consolidate ( DFibHeap * );
-static void dfh_heaplink ( DFibHeap * h, DFibHeapNode * y, DFibHeapNode * x );
+static void dfh_heaplink ( DFibHeap *h, DFibHeapNode *y, DFibHeapNode *x );
static void dfh_cut ( DFibHeap *, DFibHeapNode *, DFibHeapNode * );
static void dfh_cascading_cut ( DFibHeap *, DFibHeapNode * );
-static DFibHeapNode * dfh_extractminel ( DFibHeap * );
-static void dfh_checkcons ( DFibHeap * h );
-static int dfh_compare ( DFibHeap * h, DFibHeapNode * a, DFibHeapNode * b );
-static int dfh_comparedata ( DFibHeap * h, Time key,
- unsigned int data, DFibHeapNode * b );
-static void dfh_insertel ( DFibHeap * h, DFibHeapNode * x );
+static DFibHeapNode *dfh_extractminel ( DFibHeap * );
+static void dfh_checkcons ( DFibHeap *h );
+static int dfh_compare ( DFibHeap *h, DFibHeapNode *a, DFibHeapNode *b );
+static int dfh_comparedata ( DFibHeap *h, Time key,
+ unsigned int data, DFibHeapNode *b );
+static void dfh_insertel ( DFibHeap *h, DFibHeapNode *x );
/*
diff --git a/standardPregraph/inc/extfunc.h b/standardPregraph/inc/extfunc.h
index b687787..628573e 100644
--- a/standardPregraph/inc/extfunc.h
+++ b/standardPregraph/inc/extfunc.h
@@ -1,7 +1,7 @@
/*
* inc/extfunc.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -23,95 +23,95 @@
#include "check.h"
#include "extfunc2.h"
-extern void initAIO ( struct aiocb * aio, char * buf, int fd, int size );
-extern int AIORead ( struct aiocb * mycb, int * offset, char * buf, char * cach, int * rt, int curr_type );
-extern boolean check_file ( char * name ); //add 2012.7.6
-extern boolean checkFiles4Scaff ( char * infile );
+extern void initAIO ( struct aiocb *aio, char *buf, int fd, int size );
+extern int AIORead ( struct aiocb *mycb, int *offset, char *buf, char *cach, int *rt, int curr_type );
+extern boolean check_file ( char *name ); //add 2012.7.6
+extern boolean checkFiles4Scaff ( char *infile );
-extern boolean openNextFile ( int * libNo, boolean pairs, unsigned char asm_ctg );
+extern boolean openNextFile ( int *libNo, boolean pairs, unsigned char asm_ctg );
extern int nextValidIndex ( int libNo, boolean pair, unsigned char asm_ctg );
extern void openFileInLib ( int libNo );
extern void closeFp1InLab ( int libNo );
extern void closeFp2InLab ( int libNo );
-extern boolean readseqInLib ( char * src_seq, char * src_name, int * len_seq, char * buf, int * start, int offset, int i );
+extern boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, int *start, int offset, int i );
-extern void readseq1by1 ( char * src_seq, char * src_name, int * len_seq, FILE * fp, long long num_seq );
-extern void readseqPbyP ( char * src_seq, char * src_name, int * insertS, int * len_seq, FILE * fp, long long num_seq );
-extern long long readseqpar ( int * max_len, int * min_leg, int * max_name_len, FILE * fp );
-extern void free_edge_list ( EDGE_PT * el );
-extern void reverseComplementSeq ( char * seq, int len, char * bal_seq );
-extern void free_edge_array ( EDGE * ed_array, int ed_num );
-extern void free_lightctg_array ( LIGHTCTG * ed_array, int ed_num );
-extern char getCharInTightString ( char * tightSeq, int pos );
-extern void writeChar2tightSting ( char nt, char * tightSeq, int pos );
+extern void readseq1by1 ( char *src_seq, char *src_name, int *len_seq, FILE *fp, long long num_seq );
+extern void readseqPbyP ( char *src_seq, char *src_name, int *insertS, int *len_seq, FILE *fp, long long num_seq );
+extern long long readseqpar ( int *max_len, int *min_leg, int *max_name_len, FILE *fp );
+extern void free_edge_list ( EDGE_PT *el );
+extern void reverseComplementSeq ( char *seq, int len, char *bal_seq );
+extern void free_edge_array ( EDGE *ed_array, int ed_num );
+extern void free_lightctg_array ( LIGHTCTG *ed_array, int ed_num );
+extern char getCharInTightString ( char *tightSeq, int pos );
+extern void writeChar2tightSting ( char nt, char *tightSeq, int pos );
extern void short_reads_sum();
-extern void read_one_sequence ( FILE * fp, long long * T, char ** X );
-extern void output_edges ( preEDGE * ed_array, int ed_num, char * outfile );
-extern void loadVertex ( char * graphfile );
-extern void loadEdge ( char * graphfile );
-extern boolean loadPath ( char * graphfile );
-extern READINTERVAL * allocateRV ( int readid, int edgeid );
+extern void read_one_sequence ( FILE *fp, long long *T, char **X );
+extern void output_edges ( preEDGE *ed_array, int ed_num, char *outfile );
+extern void loadVertex ( char *graphfile );
+extern void loadEdge ( char *graphfile );
+extern boolean loadPath ( char *graphfile );
+extern READINTERVAL *allocateRV ( int readid, int edgeid );
extern void createRVmemo();
-extern void dismissRV ( READINTERVAL * rv );
+extern void dismissRV ( READINTERVAL *rv );
extern void destroyReadIntervMem();
extern void destroyConnectMem();
extern void u2uConcatenate();
-extern void output_contig ( EDGE * ed_array, unsigned int ed_num, char * outfile, int cut_len );
-extern void printTightString ( char * tightSeq, int len );
-extern int roughUniqueness ( unsigned int edgeno, char ignore_cvg, char * ignored );
-extern void outputReadPos ( char * graphfile, int min_len );
+extern void output_contig ( EDGE *ed_array, unsigned int ed_num, char *outfile, int cut_len );
+extern void printTightString ( char *tightSeq, int len );
+extern int roughUniqueness ( unsigned int edgeno, char ignore_cvg, char *ignored );
+extern void outputReadPos ( char *graphfile, int min_len );
extern void testSearch();
extern void allpathConcatenate();
-extern void output_updated_edges ( char * outfile );
-extern void output_updated_vertex ( char * outfile );
-extern void loadUpdatedEdges ( char * graphfile );
-extern void loadUpdatedVertex ( char * graphfile );
-extern void connectByPE ( char * infile );
-extern void output_cntGVZ ( char * outfile );
-extern void output_graph ( char * outfile );
+extern void output_updated_edges ( char *outfile );
+extern void output_updated_vertex ( char *outfile );
+extern void loadUpdatedEdges ( char *graphfile );
+extern void loadUpdatedVertex ( char *graphfile );
+extern void connectByPE ( char *infile );
+extern void output_cntGVZ ( char *outfile );
+extern void output_graph ( char *outfile );
extern void testLinearC2C();
-extern void output_contig_graph ( char * outfile );
-extern void scaffolding ( unsigned int cut_len, char * outfile );
-extern int cmp_int ( const void * a, const void * b );
-extern CONNECT * allocateCN ( unsigned int contigId, int gap );
+extern void output_contig_graph ( char *outfile );
+extern void scaffolding ( unsigned int cut_len, char *outfile );
+extern int cmp_int ( const void *a, const void *b );
+extern CONNECT *allocateCN ( unsigned int contigId, int gap );
extern int recoverRep();
-extern void loadPEgrads ( char * infile );
-extern int putInsertS ( long long readid, int size, int * currGrads );
-extern int getInsertS ( long long readid, int * readlen );
-extern int connectByPE_grad ( FILE * fp, int peGrad, char * line );
-extern int connectByPE_grad_gz ( gzFile * fp, int peGrad, char * line );
-extern void PEgradsScaf ( char * infile );
-extern void reorderAnnotation ( char * infile, char * outfile );
-extern void output_1edge ( preEDGE * edge, gzFile * fp );
-extern void prlRead2edge ( char * libfile, char * outfile );
-extern void annotFileTrans ( char * infile, char * outfile );
-extern void prlLoadPath ( char * graphfile );
-extern void misCheck ( char * infile, char * outfile );
-extern int uniqueLenSearch ( unsigned int * len_array, unsigned int * flag_array, int num, unsigned int target );
-extern int cmp_vertex ( const void * a, const void * b );
+extern void loadPEgrads ( char *infile );
+extern int putInsertS ( long long readid, int size, int *currGrads );
+extern int getInsertS ( long long readid, int *readlen );
+extern int connectByPE_grad ( FILE *fp, int peGrad, char *line );
+extern int connectByPE_grad_gz ( gzFile *fp, int peGrad, char *line );
+extern void PEgradsScaf ( char *infile );
+extern void reorderAnnotation ( char *infile, char *outfile );
+extern void output_1edge ( preEDGE *edge, gzFile *fp );
+extern void prlRead2edge ( char *libfile, char *outfile );
+extern void annotFileTrans ( char *infile, char *outfile );
+extern void prlLoadPath ( char *graphfile );
+extern void misCheck ( char *infile, char *outfile );
+extern int uniqueLenSearch ( unsigned int *len_array, unsigned int *flag_array, int num, unsigned int target );
+extern int cmp_vertex ( const void *a, const void *b );
extern void linkContig2Vts();
-extern int connectByPE_gradPatch ( FILE * fp1, FILE * fp2, int peGrad, char * line1, char * line2 );
-extern void scaftiging ( char * graphfile, int len_cut );
-extern void gapFilling ( char * graphfile, int cut_len );
-extern ARC * getArcBetween ( unsigned int from_ed, unsigned int to_ed );
-extern void bubblePinch ( double simiCutoff, char * outfile, int M, boolean isIter, boolean last );
+extern int connectByPE_gradPatch ( FILE *fp1, FILE *fp2, int peGrad, char *line1, char *line2 );
+extern void scaftiging ( char *graphfile, int len_cut );
+extern void gapFilling ( char *graphfile, int cut_len );
+extern ARC *getArcBetween ( unsigned int from_ed, unsigned int to_ed );
+extern void bubblePinch ( double simiCutoff, char *outfile, int M, boolean isIter, boolean last );
extern void linearConcatenate ( boolean isIter, boolean last );
extern unsigned char setArcMulti ( unsigned int from_ed, unsigned int to_ed, unsigned char value );
-extern ARC * allocateArc ( unsigned int edgeid );
+extern ARC *allocateArc ( unsigned int edgeid );
extern void cutTipsInGraph ( int cutLen, boolean strict, boolean last );
-extern ARC * deleteArc ( ARC * arc_list, ARC * arc );
+extern ARC *deleteArc ( ARC *arc_list, ARC *arc );
extern void compactEdgeArray();
-extern void dismissArc ( ARC * arc );
+extern void dismissArc ( ARC *arc );
extern void createArcMemo();
-extern ARC * getArcBetween ( unsigned int from_ed, unsigned int to_ed );
-extern ARC * allocateArc ( unsigned int edgeid );
-extern void writeChar2tightString ( char nt, char * tightSeq, int pos );
-extern void output_heavyArcs ( char * outfile );
-extern preARC * allocatePreArc ( unsigned int edgeid );
+extern ARC *getArcBetween ( unsigned int from_ed, unsigned int to_ed );
+extern ARC *allocateArc ( unsigned int edgeid );
+extern void writeChar2tightString ( char nt, char *tightSeq, int pos );
+extern void output_heavyArcs ( char *outfile );
+extern preARC *allocatePreArc ( unsigned int edgeid );
extern void destroyPreArcMem();
-extern void traceAlongArc ( unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int * num_route );
+extern void traceAlongArc ( unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int *num_route );
extern void freeContig_array();
-extern void output_scafSeq ( char * graphfile, int len_cut );
+extern void output_scafSeq ( char *graphfile, int len_cut );
extern void putArcInHash ( unsigned int from_ed, unsigned int to_ed );
extern boolean DoesArcExist ( unsigned int from_ed, unsigned int to_ed );
extern void recordArcInHash();
@@ -119,65 +119,65 @@ extern void destroyArcHash();
extern void removeWeakEdges ( int lenCutoff, unsigned int multiCutoff );
extern void createArcLookupTable();
extern void deleteArcLookupTable();
-extern void putArc2LookupTable ( unsigned int from_ed, ARC * arc );
+extern void putArc2LookupTable ( unsigned int from_ed, ARC *arc );
extern void removeArcInLookupTable ( unsigned int from_ed, unsigned int to_ed );
-extern ARC * arcCount ( unsigned int edgeid, unsigned int * num );
-extern void mapFileTrans ( char * infile );
+extern ARC *arcCount ( unsigned int edgeid, unsigned int *num );
+extern void mapFileTrans ( char *infile );
extern void solveReps();
extern void removeDeadArcs();
extern void destroyArcMem();
-extern void getCntsInFile ( char * infile );
-extern void scafByCntInfo ( char * infile );
-extern CONNECT * add1Connect ( unsigned int e1, unsigned int e2, int gap, int weight, boolean inherit );
-extern void getScaff ( char * infile );
-extern void traceAlongMaskedCnt ( unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int * num_route );
+extern void getCntsInFile ( char *infile );
+extern void scafByCntInfo ( char *infile );
+extern CONNECT *add1Connect ( unsigned int e1, unsigned int e2, int gap, int weight, boolean inherit );
+extern void getScaff ( char *infile );
+extern void traceAlongMaskedCnt ( unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int *num_route );
extern void createPreArcMemManager();
-extern boolean loadPathBin ( char * graphfile );
+extern boolean loadPathBin ( char *graphfile );
extern void recordArcsInLookupTable();
-extern FILE * multiFileRead1seq ( char * src_seq, char * src_name, int * len_seq, FILE * fp, FILE * freads );
-extern void multiFileSeqpar ( FILE * fp );
-extern long long multiFileParse ( int * max_leg, int * min_leg, int * max_name_leg, FILE * fp );
-extern CONNECT * getCntBetween ( unsigned int from_ed, unsigned int to_ed );
+extern FILE *multiFileRead1seq ( char *src_seq, char *src_name, int *len_seq, FILE *fp, FILE *freads );
+extern void multiFileSeqpar ( FILE *fp );
+extern long long multiFileParse ( int *max_leg, int *min_leg, int *max_name_leg, FILE *fp );
+extern CONNECT *getCntBetween ( unsigned int from_ed, unsigned int to_ed );
extern void createCntMemManager();
extern void destroyConnectMem();
extern void createCntLookupTable();
extern void deleteCntLookupTable();
-extern void putCnt2LookupTable ( unsigned int from_c, CONNECT * cnt );
-extern void prlRead2Ctg ( char * seqfile, char * outfile );
-extern boolean prlContig2nodes ( char * grapfile, int len_cut );
-extern void scan_libInfo ( char * libfile );
+extern void putCnt2LookupTable ( unsigned int from_c, CONNECT *cnt );
+extern void prlRead2Ctg ( char *seqfile, char *outfile );
+extern boolean prlContig2nodes ( char *grapfile, int len_cut );
+extern void scan_libInfo ( char *libfile );
extern void free_libs();
-extern boolean read1seqInLibBam ( char * src_seq, char * src_name, int * len_seq, int * libNo, boolean pair, unsigned char asm_ctg, int * type );
-extern boolean read1seqInLib ( char * src_seq, char * src_name, int * len_seq,
- int * libNo, boolean pair, unsigned char asm_ctg , int * type );
+extern boolean read1seqInLibBam ( char *src_seq, char *src_name, int *len_seq, int *libNo, boolean pair, unsigned char asm_ctg, int *type );
+extern boolean read1seqInLib ( char *src_seq, char *src_name, int *len_seq,
+ int *libNo, boolean pair, unsigned char asm_ctg , int *type );
extern void save4laterSolve();
extern void solveRepsAfter();
extern void free_pe_mem();
extern void alloc_pe_mem ( int gradsCounter );
extern void prlDestroyPreArcMem();
-extern preARC * prlAllocatePreArc ( unsigned int edgeid, MEM_MANAGER * manager );
-extern boolean prlRead2HashTable ( char * libfile, char * outfile );
+extern preARC *prlAllocatePreArc ( unsigned int edgeid, MEM_MANAGER *manager );
+extern boolean prlRead2HashTable ( char *libfile, char *outfile );
extern void free_allSets();
extern void removeSingleTips();
extern void removeMinorTips();
-extern void kmer2edges ( char * outfile );
-extern void output_vertex ( char * outfile );
-extern boolean prlRead2HashTable ( char * libfile, char * outfile );
-extern void Links2Scaf ( char * infile );
-extern void PE2Links ( char * infile );
+extern void kmer2edges ( char *outfile );
+extern void output_vertex ( char *outfile );
+extern boolean prlRead2HashTable ( char *libfile, char *outfile );
+extern void Links2Scaf ( char *infile );
+extern void PE2Links ( char *infile );
extern unsigned int getTwinCtg ( unsigned int ctg );
-extern void basicContigInfo ( char * infile );
+extern void basicContigInfo ( char *infile );
extern boolean isSmallerThanTwin ( unsigned int ctg );
extern boolean isLargerThanTwin ( unsigned int ctg );
extern boolean isSameAsTwin ( unsigned int ctg );
-extern boolean loadMarkerBin ( char * graphfile );
-extern void readsCloseGap ( char * graphfile );
-extern void prlReadsCloseGap ( char * graphfile );
-extern void locateReadOnScaf ( char * graphfile );
+extern boolean loadMarkerBin ( char *graphfile );
+extern void readsCloseGap ( char *graphfile );
+extern void prlReadsCloseGap ( char *graphfile );
+extern void locateReadOnScaf ( char *graphfile );
/*********** Kmer related *************/
extern Kmer createFilter ( int overlaplen );
-extern void printKmerSeq ( FILE * fp, Kmer kmer );
+extern void printKmerSeq ( FILE *fp, Kmer kmer );
//extern U256b Kmer2int256(Kmer seq);
extern boolean KmerLarger ( Kmer kmer1, Kmer kmer2 );
extern boolean KmerSmaller ( Kmer kmer1, Kmer kmer2 );
@@ -193,25 +193,25 @@ extern Kmer KmerRightBitMove ( Kmer word, int dis );
extern Kmer reverseComplement ( Kmer word, int overlap );
extern ubyte8 hash_kmer ( Kmer kmer );
extern int kmer2vt ( Kmer kmer );
-extern void print_kmer ( FILE * fp, Kmer kmer, char c );
-extern int bisearch ( VERTEX * vts, int num, Kmer target );
-extern void printKmerSeq ( FILE * fp, Kmer kmer );
+extern void print_kmer ( FILE *fp, Kmer kmer, char c );
+extern int bisearch ( VERTEX *vts, int num, Kmer target );
+extern void printKmerSeq ( FILE *fp, Kmer kmer );
extern char lastCharInKmer ( Kmer kmer );
-int localGraph ( READNEARBY * rdArray, int num, CTGinSCAF * ctg1, CTGinSCAF * ctg2,
- int origOverlap, Kmer * kmerCtg1, Kmer * kmerCtg2,
- int overlap, DARRAY * gapSeqArray, char * seqCtg1, char * seqCtg2, char * seqGap );
+int localGraph ( READNEARBY *rdArray, int num, CTGinSCAF *ctg1, CTGinSCAF *ctg2,
+ int origOverlap, Kmer *kmerCtg1, Kmer *kmerCtg2,
+ int overlap, DARRAY *gapSeqArray, char *seqCtg1, char *seqCtg2, char *seqGap );
extern unsigned int getTwinEdge ( unsigned int edgeno );
extern boolean EdSmallerThanTwin ( unsigned int edgeno );
extern boolean EdLargerThanTwin ( unsigned int edgeno );
extern boolean EdSameAsTwin ( unsigned int edgeno );
extern void removeLowCovEdges ( int lenCutoff, unsigned short covCutoff, boolean last );
extern int getMaxLongReadLen ( int num_libs );
-extern void prlLongRead2Ctg ( char * libfile, char * outfile );
-extern void outputTightStr ( FILE * fp, char * tightStr, int start, int length, int outputlen, int revS, int * col );
+extern void prlLongRead2Ctg ( char *libfile, char *outfile );
+extern void outputTightStr ( FILE *fp, char *tightStr, int start, int length, int outputlen, int revS, int *col );
extern void crc32c_Init();
-extern int validArcCount ( preARC * arc, int cutoff );
-extern unsigned int maxArcWeight ( preARC * arc );
+extern int validArcCount ( preARC *arc, int cutoff );
+extern unsigned int maxArcWeight ( preARC *arc );
extern __uint128_t Kmer2int128 ( Kmer seq );
-extern void printSeq ( FILE * fo, char * seq, int len );
+extern void printSeq ( FILE *fo, char *seq, int len );
diff --git a/standardPregraph/inc/extfunc2.h b/standardPregraph/inc/extfunc2.h
index 1d839a0..44d26d6 100644
--- a/standardPregraph/inc/extfunc2.h
+++ b/standardPregraph/inc/extfunc2.h
@@ -1,7 +1,7 @@
/*
* inc/extfunc2.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -22,8 +22,8 @@
#ifndef _MEM_MANAGER
#define _MEM_MANAGER
-extern MEM_MANAGER * createMem_manager ( int num_items, size_t unit_size );
-extern void * getItem ( MEM_MANAGER * mem_Manager );
-extern void returnItem ( MEM_MANAGER * mem_Manager, void * );
-extern void freeMem_manager ( MEM_MANAGER * mem_Manager );
+extern MEM_MANAGER *createMem_manager ( int num_items, size_t unit_size );
+extern void *getItem ( MEM_MANAGER *mem_Manager );
+extern void returnItem ( MEM_MANAGER *mem_Manager, void * );
+extern void freeMem_manager ( MEM_MANAGER *mem_Manager );
#endif
diff --git a/standardPregraph/inc/extvab.h b/standardPregraph/inc/extvab.h
index f7a5ce0..98d46e6 100644
--- a/standardPregraph/inc/extvab.h
+++ b/standardPregraph/inc/extvab.h
@@ -1,7 +1,7 @@
/*
* inc/extvab.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -29,52 +29,52 @@ extern Kmer WORDFILTER;
extern boolean globalFlag;
extern int thrd_num;
extern int gLineLen;
-extern char * gStr;
+extern char *gStr;
/**** reads info *****/
extern long long n_solexa;
extern long long prevNum;
extern int ins_size_var;
-extern PE_INFO * pes;
+extern PE_INFO *pes;
extern int maxReadLen;
extern int maxReadLen4all;
extern int minReadLen;
extern int maxNameLen;
extern int num_libs;
-extern LIB_INFO * lib_array;
+extern LIB_INFO *lib_array;
extern int libNo;
extern long long readNumBack;
extern int gradsCounter;
/*** used for pregraph *****/
-extern MEM_MANAGER * prearc_mem_manager; //also used in scaffolding
-extern MEM_MANAGER ** preArc_mem_managers;
+extern MEM_MANAGER *prearc_mem_manager; //also used in scaffolding
+extern MEM_MANAGER **preArc_mem_managers;
extern boolean deLowKmer;
extern boolean deLowEdge;
-extern KmerSet ** KmerSets; // also used in mapping
-extern KmerSet ** KmerSetsPatch;
+extern KmerSet **KmerSets; // also used in mapping
+extern KmerSet **KmerSetsPatch;
/**** used for contiging ****/
extern boolean repsTie;
extern long long arcCounter;
extern unsigned int num_ed;
extern unsigned int num_ed_limit;
extern unsigned int extraEdgeNum;
-extern EDGE * edge_array;
-extern VERTEX * vt_array;
-extern MEM_MANAGER * rv_mem_manager;
-extern MEM_MANAGER * arc_mem_manager;
+extern EDGE *edge_array;
+extern VERTEX *vt_array;
+extern MEM_MANAGER *rv_mem_manager;
+extern MEM_MANAGER *arc_mem_manager;
extern unsigned int num_vt;
extern unsigned long long new_num_vt;
extern int len_bar;
-extern ARC ** arcLookupTable;
-extern long long * markersArray;
+extern ARC **arcLookupTable;
+extern long long *markersArray;
/***** used for scaffolding *****/
-extern MEM_MANAGER * cn_mem_manager;
+extern MEM_MANAGER *cn_mem_manager;
extern unsigned int num_ctg;
-extern unsigned int * index_array;
-extern CONTIG * contig_array;
+extern unsigned int *index_array;
+extern CONTIG *contig_array;
extern int lineLen;
extern int weakPE;
extern long long newCntCounter;
-extern CONNECT ** cntLookupTable;
+extern CONNECT **cntLookupTable;
extern unsigned int ctg_short;
extern int cvgAvg;
extern boolean orig2new;
@@ -84,15 +84,15 @@ extern long long discardCntCounter;
extern int COMPATIBLE_MODE;
extern float cvg4SNP;
/**** used for gapFilling ****/
-extern DARRAY * readSeqInGap;
-extern DARRAY * gapSeqDarray;
-extern DARRAY ** darrayBuf;
+extern DARRAY *readSeqInGap;
+extern DARRAY *gapSeqDarray;
+extern DARRAY **darrayBuf;
extern int fillGap;
/**** used for searchPath *****/
extern int maxSteps;
extern int num_trace;
-extern unsigned int ** found_routes;
-extern unsigned int * so_far;
+extern unsigned int **found_routes;
+extern unsigned int *so_far;
extern int max_n_routes;
extern boolean maskRep;
extern int GLDiff;
@@ -102,7 +102,7 @@ extern int smallKmer;
extern int deltaKmer;
extern int gapNum;
extern int scaffNum;
-extern int * contig_index_array;
+extern int *contig_index_array;
extern double cvg_low;
extern double cvg_high;
@@ -113,10 +113,10 @@ extern int visual;
extern unsigned int num_vtnew;
extern unsigned int kmer_cnew;
extern const int step;
-extern unsigned int * edge_id;
-extern VERTEX * vt_arraynew;
-extern KmerSet2 * KmerSetsNew;
-extern MEM_MANAGER * edgeid_mem_manager;
+extern unsigned int *edge_id;
+extern VERTEX *vt_arraynew;
+extern KmerSet2 *KmerSetsNew;
+extern MEM_MANAGER *edgeid_mem_manager;
extern char libfilename[256];
extern boolean parse;
extern int nowstep2;
diff --git a/standardPregraph/inc/faidx.h b/standardPregraph/inc/faidx.h
index a36bf2c..f8bc0bc 100644
--- a/standardPregraph/inc/faidx.h
+++ b/standardPregraph/inc/faidx.h
@@ -50,58 +50,58 @@ typedef struct __faidx_t faidx_t;
extern "C" {
#endif
- /*!
- @abstract Build index for a FASTA or razip compressed FASTA file.
- @param fn FASTA file name
- @return 0 on success; or -1 on failure
- @discussion File "fn.fai" will be generated.
- */
- int fai_build ( const char * fn );
-
- /*!
- @abstract Distroy a faidx_t struct.
- @param fai Pointer to the struct to be destroyed
- */
- void fai_destroy ( faidx_t * fai );
-
- /*!
- @abstract Load index from "fn.fai".
- @param fn File name of the FASTA file
- */
- faidx_t * fai_load ( const char * fn );
-
- /*!
- @abstract Fetch the sequence in a region.
- @param fai Pointer to the faidx_t struct
- @param reg Region in the format "chr2:20,000-30,000"
- @param len Length of the region
- @return Pointer to the sequence; null on failure
-
- @discussion The returned sequence is allocated by malloc family
- and should be destroyed by end users by calling free() on it.
- */
- char * fai_fetch ( const faidx_t * fai, const char * reg, int * len );
-
- /*!
- @abstract Fetch the number of sequences.
- @param fai Pointer to the faidx_t struct
- @return The number of sequences
- */
- int faidx_fetch_nseq ( const faidx_t * fai );
-
- /*!
- @abstract Fetch the sequence in a region.
- @param fai Pointer to the faidx_t struct
- @param c_name Region name
- @param p_beg_i Beginning position number (zero-based)
- @param p_end_i End position number (zero-based)
- @param len Length of the region
- @return Pointer to the sequence; null on failure
-
- @discussion The returned sequence is allocated by malloc family
- and should be destroyed by end users by calling free() on it.
- */
- char * faidx_fetch_seq ( const faidx_t * fai, char * c_name, int p_beg_i, int p_end_i, int * len );
+/*!
+ @abstract Build index for a FASTA or razip compressed FASTA file.
+ @param fn FASTA file name
+ @return 0 on success; or -1 on failure
+ @discussion File "fn.fai" will be generated.
+ */
+int fai_build ( const char *fn );
+
+/*!
+ @abstract Distroy a faidx_t struct.
+ @param fai Pointer to the struct to be destroyed
+ */
+void fai_destroy ( faidx_t *fai );
+
+/*!
+ @abstract Load index from "fn.fai".
+ @param fn File name of the FASTA file
+ */
+faidx_t *fai_load ( const char *fn );
+
+/*!
+ @abstract Fetch the sequence in a region.
+ @param fai Pointer to the faidx_t struct
+ @param reg Region in the format "chr2:20,000-30,000"
+ @param len Length of the region
+ @return Pointer to the sequence; null on failure
+
+ @discussion The returned sequence is allocated by malloc family
+ and should be destroyed by end users by calling free() on it.
+ */
+char *fai_fetch ( const faidx_t *fai, const char *reg, int *len );
+
+/*!
+ @abstract Fetch the number of sequences.
+ @param fai Pointer to the faidx_t struct
+ @return The number of sequences
+ */
+int faidx_fetch_nseq ( const faidx_t *fai );
+
+/*!
+ @abstract Fetch the sequence in a region.
+ @param fai Pointer to the faidx_t struct
+ @param c_name Region name
+ @param p_beg_i Beginning position number (zero-based)
+ @param p_end_i End position number (zero-based)
+ @param len Length of the region
+ @return Pointer to the sequence; null on failure
+
+ @discussion The returned sequence is allocated by malloc family
+ and should be destroyed by end users by calling free() on it.
+ */
+char *faidx_fetch_seq ( const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len );
#ifdef __cplusplus
}
diff --git a/standardPregraph/inc/fib.h b/standardPregraph/inc/fib.h
index 4c05657..ccaabd6 100644
--- a/standardPregraph/inc/fib.h
+++ b/standardPregraph/inc/fib.h
@@ -43,17 +43,17 @@ typedef Coordinate ( *voidcmp ) ( unsigned int , unsigned int );
/* functions for key heaps */
boolean fh_isempty ( FibHeap * );
-FibHeap * fh_makekeyheap ( void );
-FibHeapNode * fh_insertkey ( FibHeap *, Coordinate, unsigned int );
+FibHeap *fh_makekeyheap ( void );
+FibHeapNode *fh_insertkey ( FibHeap *, Coordinate, unsigned int );
Coordinate fh_minkey ( FibHeap * );
Coordinate fh_replacekey ( FibHeap *, FibHeapNode *, Coordinate );
unsigned int fh_replacekeydata ( FibHeap *, FibHeapNode *, Coordinate, unsigned int );
/* functions for unsigned int * heaps */
-FibHeap * fh_makeheap ( void );
+FibHeap *fh_makeheap ( void );
voidcmp fh_setcmp ( FibHeap *, voidcmp );
unsigned int fh_setneginf ( FibHeap *, unsigned int );
-FibHeapNode * fh_insert ( FibHeap *, unsigned int );
+FibHeapNode *fh_insert ( FibHeap *, unsigned int );
/* shared functions */
unsigned int fh_extractmin ( FibHeap * );
@@ -61,6 +61,6 @@ unsigned int fh_min ( FibHeap * );
unsigned int fh_replacedata ( FibHeapNode *, unsigned int );
unsigned int fh_delete ( FibHeap *, FibHeapNode * );
void fh_deleteheap ( FibHeap * );
-FibHeap * fh_union ( FibHeap *, FibHeap * );
+FibHeap *fh_union ( FibHeap *, FibHeap * );
#endif /* _FIB_H_ */
diff --git a/standardPregraph/inc/fibHeap.h b/standardPregraph/inc/fibHeap.h
index 9a940b0..9ab5aa0 100644
--- a/standardPregraph/inc/fibHeap.h
+++ b/standardPregraph/inc/fibHeap.h
@@ -1,7 +1,7 @@
/*
* inc/fibHeap.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -23,23 +23,23 @@
#ifndef _FIBHEAP_H_
#define _FIBHEAP_H_
-FibHeap * newFibHeap();
+FibHeap *newFibHeap();
-FibHeapNode * insertNodeIntoHeap ( FibHeap * heap, Coordinate key,
- unsigned int node );
+FibHeapNode *insertNodeIntoHeap ( FibHeap *heap, Coordinate key,
+ unsigned int node );
-Coordinate minKeyOfHeap ( FibHeap * heap );
+Coordinate minKeyOfHeap ( FibHeap *heap );
-Coordinate replaceKeyInHeap ( FibHeap * heap, FibHeapNode * node,
+Coordinate replaceKeyInHeap ( FibHeap *heap, FibHeapNode *node,
Coordinate newKey );
-void replaceValueInHeap ( FibHeapNode * node, unsigned int newValue );
+void replaceValueInHeap ( FibHeapNode *node, unsigned int newValue );
-unsigned int removeNextNodeFromHeap ( FibHeap * heap );
+unsigned int removeNextNodeFromHeap ( FibHeap *heap );
-void * destroyNodeInHeap ( FibHeapNode * node, FibHeap * heap );
+void *destroyNodeInHeap ( FibHeapNode *node, FibHeap *heap );
-void destroyHeap ( FibHeap * heap );
+void destroyHeap ( FibHeap *heap );
-boolean IsHeapEmpty ( FibHeap * heap );
+boolean IsHeapEmpty ( FibHeap *heap );
#endif
diff --git a/standardPregraph/inc/fibpriv.h b/standardPregraph/inc/fibpriv.h
index af8529f..20e5ab3 100644
--- a/standardPregraph/inc/fibpriv.h
+++ b/standardPregraph/inc/fibpriv.h
@@ -44,52 +44,52 @@
*/
struct fibheap_el
{
- int fhe_degree;
- boolean fhe_mark;
- FibHeapNode * fhe_p;
- FibHeapNode * fhe_child;
- FibHeapNode * fhe_left;
- FibHeapNode * fhe_right;
- Coordinate fhe_key;
- unsigned int fhe_data;
+ int fhe_degree;
+ boolean fhe_mark;
+ FibHeapNode *fhe_p;
+ FibHeapNode *fhe_child;
+ FibHeapNode *fhe_left;
+ FibHeapNode *fhe_right;
+ Coordinate fhe_key;
+ unsigned int fhe_data;
};
-static FibHeapNode * fhe_newelem ( struct fibheap * );
+static FibHeapNode *fhe_newelem ( struct fibheap * );
static void fhe_initelem ( FibHeapNode * );
-static void fhe_insertafter ( FibHeapNode * a, FibHeapNode * b );
-static inline void fhe_insertbefore ( FibHeapNode * a, FibHeapNode * b );
-static FibHeapNode * fhe_remove ( FibHeapNode * a );
+static void fhe_insertafter ( FibHeapNode *a, FibHeapNode *b );
+static inline void fhe_insertbefore ( FibHeapNode *a, FibHeapNode *b );
+static FibHeapNode *fhe_remove ( FibHeapNode *a );
/*
* global heap operations
*/
struct fibheap
{
- Coordinate ( *fh_cmp_fnct ) ( unsigned int, unsigned int );
- MEM_MANAGER * nodeMemory;
- IDnum fh_n;
- IDnum fh_Dl;
- FibHeapNode ** fh_cons;
- FibHeapNode * fh_min;
- FibHeapNode * fh_root;
- unsigned int fh_neginf;
- boolean fh_keys: 1;
+ Coordinate ( *fh_cmp_fnct ) ( unsigned int, unsigned int );
+ MEM_MANAGER *nodeMemory;
+ IDnum fh_n;
+ IDnum fh_Dl;
+ FibHeapNode **fh_cons;
+ FibHeapNode *fh_min;
+ FibHeapNode *fh_root;
+ unsigned int fh_neginf;
+ boolean fh_keys: 1;
};
static void fh_initheap ( FibHeap * );
static void fh_insertrootlist ( FibHeap *, FibHeapNode * );
static void fh_removerootlist ( FibHeap *, FibHeapNode * );
static void fh_consolidate ( FibHeap * );
-static void fh_heaplink ( FibHeap * h, FibHeapNode * y, FibHeapNode * x );
+static void fh_heaplink ( FibHeap *h, FibHeapNode *y, FibHeapNode *x );
static void fh_cut ( FibHeap *, FibHeapNode *, FibHeapNode * );
static void fh_cascading_cut ( FibHeap *, FibHeapNode * );
-static FibHeapNode * fh_extractminel ( FibHeap * );
-static void fh_checkcons ( FibHeap * h );
-static void fh_destroyheap ( FibHeap * h );
-static int fh_compare ( FibHeap * h, FibHeapNode * a, FibHeapNode * b );
-static int fh_comparedata ( FibHeap * h, Coordinate key,
- unsigned int data, FibHeapNode * b );
-static void fh_insertel ( FibHeap * h, FibHeapNode * x );
+static FibHeapNode *fh_extractminel ( FibHeap * );
+static void fh_checkcons ( FibHeap *h );
+static void fh_destroyheap ( FibHeap *h );
+static int fh_compare ( FibHeap *h, FibHeapNode *a, FibHeapNode *b );
+static int fh_comparedata ( FibHeap *h, Coordinate key,
+ unsigned int data, FibHeapNode *b );
+static void fh_insertel ( FibHeap *h, FibHeapNode *x );
/*
* general functions
diff --git a/standardPregraph/inc/glf.h b/standardPregraph/inc/glf.h
index a6d2337..432a077 100644
--- a/standardPregraph/inc/glf.h
+++ b/standardPregraph/inc/glf.h
@@ -4,15 +4,15 @@
typedef struct
{
- unsigned char ref_base: 4, dummy: 4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
- unsigned char max_mapQ; /** maximum mapping quality */
- unsigned char lk[10]; /** log likelihood ratio, capped at 255 */
- unsigned min_lk: 8, depth: 24; /** minimum lk capped at 255, and the number of mapped reads */
+ unsigned char ref_base: 4, dummy: 4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
+ unsigned char max_mapQ; /** maximum mapping quality */
+ unsigned char lk[10]; /** log likelihood ratio, capped at 255 */
+ unsigned min_lk: 8, depth: 24; /** minimum lk capped at 255, and the number of mapped reads */
} glf1_t;
#include <stdint.h>
#include "bgzf.h"
-typedef BGZF * glfFile;
+typedef BGZF *glfFile;
#define GLF3_RTYPE_END 0
#define GLF3_RTYPE_SUB 1
@@ -20,21 +20,21 @@ typedef BGZF * glfFile;
typedef struct
{
- uint8_t ref_base: 4, rtype: 4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
- uint8_t rms_mapQ; /** RMS mapping quality */
- uint8_t lk[10]; /** log likelihood ratio, capped at 255 */
- uint32_t min_lk: 8, depth: 24; /** minimum lk capped at 255, and the number of mapped reads */
- int32_t offset; /** the first base in a chromosome has offset zero. */
- // for indel (lkHom1, lkHom2 and lkHet are the first three elements in lk[10])
- int16_t indel_len[2];
- int32_t max_len; // maximum indel len; will be modified by glf3_read1()
- char * indel_seq[2];
+ uint8_t ref_base: 4, rtype: 4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */
+ uint8_t rms_mapQ; /** RMS mapping quality */
+ uint8_t lk[10]; /** log likelihood ratio, capped at 255 */
+ uint32_t min_lk: 8, depth: 24; /** minimum lk capped at 255, and the number of mapped reads */
+ int32_t offset; /** the first base in a chromosome has offset zero. */
+ // for indel (lkHom1, lkHom2 and lkHet are the first three elements in lk[10])
+ int16_t indel_len[2];
+ int32_t max_len; // maximum indel len; will be modified by glf3_read1()
+ char *indel_seq[2];
} glf3_t;
typedef struct
{
- int32_t l_text;
- uint8_t * text;
+ int32_t l_text;
+ uint8_t *text;
} glf3_header_t;
#ifdef __cplusplus
@@ -44,14 +44,14 @@ extern "C" {
#define glf3_init1() ((glf3_t*)calloc(1, sizeof(glf3_t)))
#define glf3_destroy1(g3) do { free((g3)->indel_seq[0]); free((g3)->indel_seq[1]); free(g3); } while (0)
- glf3_header_t * glf3_header_init();
- glf3_header_t * glf3_header_read ( glfFile fp );
- void glf3_header_write ( glfFile fp, const glf3_header_t * h );
- void glf3_header_destroy ( glf3_header_t * h );
- char * glf3_ref_read ( glfFile fp, int * len );
- void glf3_ref_write ( glfFile fp, const char * name, int len );
- int glf3_write1 ( glfFile fp, const glf3_t * g3 );
- int glf3_read1 ( glfFile fp, glf3_t * g3 );
+glf3_header_t *glf3_header_init();
+glf3_header_t *glf3_header_read ( glfFile fp );
+void glf3_header_write ( glfFile fp, const glf3_header_t *h );
+void glf3_header_destroy ( glf3_header_t *h );
+char *glf3_ref_read ( glfFile fp, int *len );
+void glf3_ref_write ( glfFile fp, const char *name, int len );
+int glf3_write1 ( glfFile fp, const glf3_t *g3 );
+int glf3_read1 ( glfFile fp, glf3_t *g3 );
#ifdef __cplusplus
}
diff --git a/standardPregraph/inc/global.h b/standardPregraph/inc/global.h
index 7e0b7f2..8e46b9e 100644
--- a/standardPregraph/inc/global.h
+++ b/standardPregraph/inc/global.h
@@ -1,7 +1,7 @@
/*
* inc/global.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -21,7 +21,7 @@
*/
int visual = 0; // 1 for output some files , which are useful for visual
-int * contig_index_array = NULL;
+int *contig_index_array = NULL;
int scaffNum = 0;
int gapNum = 1;
boolean fill = 0; // 1 for output some files ,which are useful for the software "kgf"
@@ -31,14 +31,14 @@ long long n_ban; //not used
long long n_solexa = 0; //reads number
long long prevNum = 0; //not used
int ins_size_var = 20; // SD of insert-size
-PE_INFO * pes = NULL; //record the pe info in lib file
-MEM_MANAGER * rv_mem_manager = NULL;
-MEM_MANAGER * cn_mem_manager = NULL;
-MEM_MANAGER * arc_mem_manager = NULL;
+PE_INFO *pes = NULL; //record the pe info in lib file
+MEM_MANAGER *rv_mem_manager = NULL;
+MEM_MANAGER *cn_mem_manager = NULL;
+MEM_MANAGER *arc_mem_manager = NULL;
unsigned int num_vt = 0; // num of the end-kmer
unsigned long long new_num_vt = 0; // the new num of the end-kmer after adding the new end-kmer
-unsigned int ** found_routes = NULL;
-unsigned int * so_far = NULL; // recorf the path of contig while filling gap
+unsigned int **found_routes = NULL;
+unsigned int *so_far = NULL; // recorf the path of contig while filling gap
int max_n_routes = 10;
int num_trace;
Kmer WORDFILTER; //mask code for extracting Kmer info from raw data (two unsigned long long int)
@@ -46,43 +46,43 @@ unsigned int num_ed = 0; //number of edges
unsigned int num_ctg = 0; // num of contig
unsigned int num_ed_limit; // the count of edge
unsigned int extraEdgeNum; // the new count of edge after adding the new edge
-EDGE * edge_array = NULL; // used to record all the info of edge
-VERTEX * vt_array = NULL; // used to record the sequence info of the end-kmer
-unsigned int * index_array = NULL; // used to translate the old contig index to the new contig index
-CONTIG * contig_array = NULL; // used to record all the info of contig
+EDGE *edge_array = NULL; // used to record all the info of edge
+VERTEX *vt_array = NULL; // used to record the sequence info of the end-kmer
+unsigned int *index_array = NULL; // used to translate the old contig index to the new contig index
+CONTIG *contig_array = NULL; // used to record all the info of contig
int lineLen;
int len_bar = 100;
int weakPE = 3; // the minimun weight requirement for the connection
int fillGap = 0; // 1 for fill the gap after scaffold asm
boolean globalFlag;
long long arcCounter; // record the num of the arc
-MEM_MANAGER * prearc_mem_manager = NULL;
-MEM_MANAGER ** preArc_mem_managers = NULL;
+MEM_MANAGER *prearc_mem_manager = NULL;
+MEM_MANAGER **preArc_mem_managers = NULL;
int maxReadLen = 0; //max length will be used for each LIB, soapdenovo read LIBs one by one , for each set a maxReadLen
int maxReadLen4all = 0; //max length will be used for all reads
int minReadLen = 0; // min length will be used for all readss
int maxNameLen = 0; //max length for the name of reads or sequences
-ARC ** arcLookupTable = NULL;
-long long * markersArray = NULL;
+ARC **arcLookupTable = NULL;
+long long *markersArray = NULL;
boolean deLowKmer = 0; //remove the kmers which coverage are not bigger than deLowKmer
boolean deLowEdge = 1; //remove the edges which coverage are not bigger than deLowEdge
long long newCntCounter; // record the number of the new connection in one insert-size
long long discardCntCounter;
boolean repsTie = 0; //sovle tiny repeat or not
-CONNECT ** cntLookupTable = NULL;
+CONNECT **cntLookupTable = NULL;
int num_libs = 0; //number of LIBs in read config file
-LIB_INFO * lib_array = NULL; //store LIB's info into lib_array
+LIB_INFO *lib_array = NULL; //store LIB's info into lib_array
int libNo = 0; // the current number of lib
long long readNumBack;
int gradsCounter; //pair number in lib file
unsigned int ctg_short = 0; //shortest contig for scaffolding
int thrd_num = 8; //thread number
int cvgAvg = 0; // the average coverage of contigs
-KmerSet ** KmerSets = NULL; //KmerSet [i] for thread i
-KmerSet ** KmerSetsPatch = NULL; //KmerSet for (k+1) mer
-DARRAY * readSeqInGap = NULL;
-DARRAY * gapSeqDarray = NULL;
-DARRAY ** darrayBuf;
+KmerSet **KmerSets = NULL; //KmerSet [i] for thread i
+KmerSet **KmerSetsPatch = NULL; //KmerSet for (k+1) mer
+DARRAY *readSeqInGap = NULL;
+DARRAY *gapSeqDarray = NULL;
+DARRAY **darrayBuf;
boolean orig2new; // 1 for re-arrange the contig index using the length
int maxSteps;
boolean maskRep = 1; // 1 for masking repeat for scaffold asm , 0 for un-masking repeat.
@@ -102,17 +102,17 @@ int score_mask = 1;
int COMPATIBLE_MODE = 0; // 1 for the gz file ; 0 for the normal file
float cvg4SNP = 0.6;
-MEM_MANAGER * edgeid_mem_manager = NULL;
+MEM_MANAGER *edgeid_mem_manager = NULL;
unsigned int num_vtnew = 0; //new vertex num
unsigned int kmer_cnew = 0; //new kmer num
const int step = 1; //step for multi kmer
//int nowstep = 1;
int nowstep2 = 1;
-unsigned int * edge_id = NULL; //edge id array
-VERTEX * vt_arraynew = NULL; //vertex array for k+1mer
+unsigned int *edge_id = NULL; //edge id array
+VERTEX *vt_arraynew = NULL; //vertex array for k+1mer
-KmerSet2 * KmerSetsNew = NULL; //kmer set for k+1mer
+KmerSet2 *KmerSetsNew = NULL; //kmer set for k+1mer
char libfilename[256];
boolean parse = 0;
unsigned int num_ed_temp = 0; // record the count of the edge
@@ -126,4 +126,4 @@ int clean; //merge clean bubble
unsigned int num_kmer_limit;
int gLineLen = 5000;
-char * gStr = NULL;
+char *gStr = NULL;
diff --git a/standardPregraph/inc/kmerhash.h b/standardPregraph/inc/kmerhash.h
index 7421119..d04926e 100644
--- a/standardPregraph/inc/kmerhash.h
+++ b/standardPregraph/inc/kmerhash.h
@@ -1,7 +1,7 @@
/*
* inc/kmerhash.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -27,39 +27,39 @@
typedef struct edgeID
{
- unsigned int edge; //edge id
- char flag; //00: big to, 01: big from, 10: small to, 11: small from
- struct edgeID * next;
+ unsigned int edge; //edge id
+ char flag; //00: big to, 01: big from, 10: small to, 11: small from
+ struct edgeID *next;
} EDGEID;
typedef struct kmer_st2
{
- Kmer seq; //kmer sequence
- ubyte4 l_links; //left out degree
- ubyte4 r_links; //right out degree
- int count; //edge number
- struct edgeID * edgeId;
+ Kmer seq; //kmer sequence
+ ubyte4 l_links; //left out degree
+ ubyte4 r_links; //right out degree
+ int count; //edge number
+ struct edgeID *edgeId;
} kmer_t2;
typedef struct kmerSet_st2
{
- kmer_t2 * array; //kmer set
- ubyte4 * flags; //mark the element pos that exist in array
- ubyte8 size;
- ubyte8 count;
- ubyte8 max;
- double load_factor;
- ubyte8 iter_ptr;
+ kmer_t2 *array; //kmer set
+ ubyte4 *flags; //mark the element pos that exist in array
+ ubyte8 size;
+ ubyte8 count;
+ ubyte8 max;
+ double load_factor;
+ ubyte8 iter_ptr;
} KmerSet2;
-extern KmerSet2 * init_kmerset2 ( ubyte8 init_size, float load_factor );
-extern int search_kmerset2 ( KmerSet2 * set, Kmer seq, kmer_t2 ** rs );
-extern int put_kmerset2 ( KmerSet2 * set, Kmer seq, int id, char flag, kmer_t2 ** kmer_p );
-extern byte8 count_kmerset2 ( KmerSet2 * set );
-extern void free_Sets2 ( KmerSet2 ** KmerSets, int num );
-extern void free_kmerset2 ( KmerSet2 * set );
-extern void update_kmer2 ( kmer_t2 * mer, int id, char flag );
-extern void set_new_kmer2 ( kmer_t2 * mer, Kmer seq, int id, char flag );
+extern KmerSet2 *init_kmerset2 ( ubyte8 init_size, float load_factor );
+extern int search_kmerset2 ( KmerSet2 *set, Kmer seq, kmer_t2 **rs );
+extern int put_kmerset2 ( KmerSet2 *set, Kmer seq, int id, char flag, kmer_t2 **kmer_p );
+extern byte8 count_kmerset2 ( KmerSet2 *set );
+extern void free_Sets2 ( KmerSet2 **KmerSets, int num );
+extern void free_kmerset2 ( KmerSet2 *set );
+extern void update_kmer2 ( kmer_t2 *mer, int id, char flag );
+extern void set_new_kmer2 ( kmer_t2 *mer, Kmer seq, int id, char flag );
#endif
diff --git a/standardPregraph/inc/knetfile.h b/standardPregraph/inc/knetfile.h
index ef70527..4b021d4 100644
--- a/standardPregraph/inc/knetfile.h
+++ b/standardPregraph/inc/knetfile.h
@@ -23,18 +23,18 @@
typedef struct knetFile_s
{
- int type, fd;
- int64_t offset;
- char * host, *port;
-
- // the following are for FTP only
- int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
- char * response, *retr, *size_cmd;
- int64_t seek_offset; // for lazy seek
- int64_t file_size;
-
- // the following are for HTTP only
- char * path, *http_host;
+ int type, fd;
+ int64_t offset;
+ char *host, *port;
+
+ // the following are for FTP only
+ int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
+ char *response, *retr, *size_cmd;
+ int64_t seek_offset; // for lazy seek
+ int64_t file_size;
+
+ // the following are for HTTP only
+ char *path, *http_host;
} knetFile;
#define knet_tell(fp) ((fp)->offset)
@@ -45,29 +45,29 @@ extern "C" {
#endif
#ifdef _WIN32
- int knet_win32_init();
- void knet_win32_destroy();
+int knet_win32_init();
+void knet_win32_destroy();
#endif
- knetFile * knet_open ( const char * fn, const char * mode );
-
- /*
- This only works with local files.
- */
- knetFile * knet_dopen ( int fd, const char * mode );
-
- /*
- If ->is_ready==0, this routine updates ->fd; otherwise, it simply
- reads from ->fd.
- */
- off_t knet_read ( knetFile * fp, void * buf, off_t len );
-
- /*
- This routine only sets ->offset and ->is_ready=0. It does not
- communicate with the FTP server.
- */
- off_t knet_seek ( knetFile * fp, int64_t off, int whence );
- int knet_close ( knetFile * fp );
+knetFile *knet_open ( const char *fn, const char *mode );
+
+/*
+ This only works with local files.
+ */
+knetFile *knet_dopen ( int fd, const char *mode );
+
+/*
+ If ->is_ready==0, this routine updates ->fd; otherwise, it simply
+ reads from ->fd.
+ */
+off_t knet_read ( knetFile *fp, void *buf, off_t len );
+
+/*
+ This routine only sets ->offset and ->is_ready=0. It does not
+ communicate with the FTP server.
+ */
+off_t knet_seek ( knetFile *fp, int64_t off, int whence );
+int knet_close ( knetFile *fp );
#ifdef __cplusplus
}
diff --git a/standardPregraph/inc/kstring.h b/standardPregraph/inc/kstring.h
index 22dbd69..392d06c 100644
--- a/standardPregraph/inc/kstring.h
+++ b/standardPregraph/inc/kstring.h
@@ -13,61 +13,61 @@
#define KSTRING_T kstring_t
typedef struct __kstring_t
{
- size_t l, m;
- char * s;
+ size_t l, m;
+ char *s;
} kstring_t;
#endif
-int ksprintf ( kstring_t * s, const char * fmt, ... );
-int ksplit_core ( char * s, int delimiter, int * _max, int ** _offsets );
+int ksprintf ( kstring_t *s, const char *fmt, ... );
+int ksplit_core ( char *s, int delimiter, int *_max, int **_offsets );
// calculate the auxiliary array, allocated by calloc()
-int * ksBM_prep ( const uint8_t * pat, int m );
+int *ksBM_prep ( const uint8_t *pat, int m );
/* Search pat in str and returned the list of matches. The size of the
* list is returned as n_matches. _prep is the array returned by
* ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */
-int * ksBM_search ( const uint8_t * str, int n, const uint8_t * pat, int m, int * _prep, int * n_matches );
+int *ksBM_search ( const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches );
-static inline int kputsn ( const char * p, int l, kstring_t * s )
+static inline int kputsn ( const char *p, int l, kstring_t *s )
{
- if ( s->l + l + 1 >= s->m )
- {
- s->m = s->l + l + 2;
- kroundup32 ( s->m );
- s->s = ( char * ) realloc ( s->s, s->m );
- }
+ if ( s->l + l + 1 >= s->m )
+ {
+ s->m = s->l + l + 2;
+ kroundup32 ( s->m );
+ s->s = ( char * ) realloc ( s->s, s->m );
+ }
- strncpy ( s->s + s->l, p, l );
- s->l += l;
- s->s[s->l] = 0;
- return l;
+ strncpy ( s->s + s->l, p, l );
+ s->l += l;
+ s->s[s->l] = 0;
+ return l;
}
-static inline int kputs ( const char * p, kstring_t * s )
+static inline int kputs ( const char *p, kstring_t *s )
{
- return kputsn ( p, strlen ( p ), s );
+ return kputsn ( p, strlen ( p ), s );
}
-static inline int kputc ( int c, kstring_t * s )
+static inline int kputc ( int c, kstring_t *s )
{
- if ( s->l + 1 >= s->m )
- {
- s->m = s->l + 2;
- kroundup32 ( s->m );
- s->s = ( char * ) realloc ( s->s, s->m );
- }
+ if ( s->l + 1 >= s->m )
+ {
+ s->m = s->l + 2;
+ kroundup32 ( s->m );
+ s->s = ( char * ) realloc ( s->s, s->m );
+ }
- s->s[s->l++] = c;
- s->s[s->l] = 0;
- return c;
+ s->s[s->l++] = c;
+ s->s[s->l] = 0;
+ return c;
}
-static inline int * ksplit ( kstring_t * s, int delimiter, int * n )
+static inline int *ksplit ( kstring_t *s, int delimiter, int *n )
{
- int max = 0, *offsets = 0;
- *n = ksplit_core ( s->s, delimiter, &max, &offsets );
- return offsets;
+ int max = 0, *offsets = 0;
+ *n = ksplit_core ( s->s, delimiter, &max, &offsets );
+ return offsets;
}
#endif
diff --git a/standardPregraph/inc/newhash.h b/standardPregraph/inc/newhash.h
index 8b5d17b..ff58efc 100644
--- a/standardPregraph/inc/newhash.h
+++ b/standardPregraph/inc/newhash.h
@@ -1,7 +1,7 @@
/*
* inc/newhash.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -57,55 +57,55 @@ typedef __uint128_t u128b;
typedef struct u256b
{
- u128b low;
- u128b high;
+ u128b low;
+ u128b high;
} U256b;
#else
#endif
typedef struct kmer_st
{
- Kmer seq; //kmer set
- ubyte4 l_links; // sever as edgeID since make_edge
- ubyte4 r_links: 4 * EDGE_BIT_SIZE;
- ubyte4 linear: 1;
- ubyte4 deleted: 1;
- ubyte4 checked: 1;
- ubyte4 single: 1;
- ubyte4 twin: 2;
- ubyte4 inEdge: 2;
+ Kmer seq; //kmer set
+ ubyte4 l_links; // sever as edgeID since make_edge
+ ubyte4 r_links: 4 * EDGE_BIT_SIZE;
+ ubyte4 linear: 1;
+ ubyte4 deleted: 1;
+ ubyte4 checked: 1;
+ ubyte4 single: 1;
+ ubyte4 twin: 2;
+ ubyte4 inEdge: 2;
} kmer_t;
typedef struct kmerSet_st
{
- kmer_t * array; //kmer set
- ubyte4 * flags; //mark the element pos that exist in array
- ubyte8 size;
- ubyte8 count;
- ubyte8 max;
- float load_factor;
- ubyte8 iter_ptr; //iter in set
+ kmer_t *array; //kmer set
+ ubyte4 *flags; //mark the element pos that exist in array
+ ubyte8 size;
+ ubyte8 count;
+ ubyte8 max;
+ float load_factor;
+ ubyte8 iter_ptr; //iter in set
} KmerSet;
typedef struct kmer_pt
{
- kmer_t * node;
- Kmer kmer;
- boolean isSmaller;
- struct kmer_pt * next;
+ kmer_t *node;
+ Kmer kmer;
+ boolean isSmaller;
+ struct kmer_pt *next;
} KMER_PT;
-extern KmerSet * init_kmerset ( ubyte8 init_size, float load_factor );
-extern int search_kmerset ( KmerSet * set, Kmer seq, kmer_t ** rs );
-extern int put_kmerset ( KmerSet * set, Kmer seq, ubyte left, ubyte right, kmer_t ** kmer_p );
-extern byte8 count_kmerset ( KmerSet * set );
-extern void free_Sets ( KmerSet ** KmerSets, int num );
-extern void free_kmerset ( KmerSet * set );
-extern void dislink2nextUncertain ( kmer_t * node, char ch, boolean smaller );
-extern void dislink2prevUncertain ( kmer_t * node, char ch, boolean smaller );
-
-extern int count_branch2prev ( kmer_t * node );
-extern int count_branch2next ( kmer_t * node );
+extern KmerSet *init_kmerset ( ubyte8 init_size, float load_factor );
+extern int search_kmerset ( KmerSet *set, Kmer seq, kmer_t **rs );
+extern int put_kmerset ( KmerSet *set, Kmer seq, ubyte left, ubyte right, kmer_t **kmer_p );
+extern byte8 count_kmerset ( KmerSet *set );
+extern void free_Sets ( KmerSet **KmerSets, int num );
+extern void free_kmerset ( KmerSet *set );
+extern void dislink2nextUncertain ( kmer_t *node, char ch, boolean smaller );
+extern void dislink2prevUncertain ( kmer_t *node, char ch, boolean smaller );
+
+extern int count_branch2prev ( kmer_t *node );
+extern int count_branch2next ( kmer_t *node );
extern char firstCharInKmer ( Kmer kmer );
#endif
diff --git a/standardPregraph/inc/nuc.h b/standardPregraph/inc/nuc.h
index 7762cf8..d0044b4 100644
--- a/standardPregraph/inc/nuc.h
+++ b/standardPregraph/inc/nuc.h
@@ -1,7 +1,7 @@
/*
* inc/nuc.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
diff --git a/standardPregraph/inc/razf.h b/standardPregraph/inc/razf.h
index 425257e..dcbdf9e 100644
--- a/standardPregraph/inc/razf.h
+++ b/standardPregraph/inc/razf.h
@@ -66,10 +66,10 @@ typedef struct _gz_header_s _gz_header;
typedef struct
{
- uint32_t * cell_offsets; // i
- int64_t * bin_offsets; // i / BIN_SIZE
- int size;
- int cap;
+ uint32_t *cell_offsets; // i
+ int64_t *bin_offsets; // i / BIN_SIZE
+ int size;
+ int cap;
} ZBlockIndex;
/* When storing index, output bytes in Big-Endian everywhere */
@@ -79,56 +79,56 @@ typedef struct
typedef struct RandomAccessZFile
{
- char mode; /* 'w' : write mode; 'r' : read mode */
- int file_type;
- /* plain file or rz file, razf_read support plain file as input too, in this case, razf_read work as buffered fread */
+ char mode; /* 'w' : write mode; 'r' : read mode */
+ int file_type;
+ /* plain file or rz file, razf_read support plain file as input too, in this case, razf_read work as buffered fread */
#ifdef _USE_KNETFILE
- union
- {
- knetFile * fpr;
- int fpw;
- } x;
+ union
+ {
+ knetFile *fpr;
+ int fpw;
+ } x;
#else
- int filedes; /* the file descriptor */
+ int filedes; /* the file descriptor */
#endif
- z_stream * stream;
- ZBlockIndex * index;
- int64_t in, out, end, src_end;
- /* in: n bytes total in; out: n bytes total out; */
- /* end: the end of all data blocks, while the start of index; src_end: the true end position in uncompressed file */
- int buf_flush; // buffer should be flush, suspend inflate util buffer is empty
- int64_t block_pos, block_off, next_block_pos;
- /* block_pos: the start postiion of current block in compressed file */
- /* block_off: tell how many bytes have been read from current block */
- void * inbuf, *outbuf;
- int header_size;
- gz_header * header;
- /* header is used to transfer inflate_state->mode from HEAD to TYPE after call inflateReset */
- int buf_off, buf_len;
- int z_err, z_eof;
- int seekable;
- /* Indice where the source is seekable */
- int load_index;
- /* set has_index to 0 in mode 'w', then index will be discarded */
+ z_stream *stream;
+ ZBlockIndex *index;
+ int64_t in, out, end, src_end;
+ /* in: n bytes total in; out: n bytes total out; */
+ /* end: the end of all data blocks, while the start of index; src_end: the true end position in uncompressed file */
+ int buf_flush; // buffer should be flush, suspend inflate util buffer is empty
+ int64_t block_pos, block_off, next_block_pos;
+ /* block_pos: the start postiion of current block in compressed file */
+ /* block_off: tell how many bytes have been read from current block */
+ void *inbuf, *outbuf;
+ int header_size;
+ gz_header *header;
+ /* header is used to transfer inflate_state->mode from HEAD to TYPE after call inflateReset */
+ int buf_off, buf_len;
+ int z_err, z_eof;
+ int seekable;
+ /* Indice where the source is seekable */
+ int load_index;
+ /* set has_index to 0 in mode 'w', then index will be discarded */
} RAZF;
#ifdef __cplusplus
extern "C" {
#endif
- RAZF * razf_dopen ( int data_fd, const char * mode );
- RAZF * razf_open ( const char * fn, const char * mode );
- int razf_write ( RAZF * rz, const void * data, int size );
- int razf_read ( RAZF * rz, void * data, int size );
- int64_t razf_seek ( RAZF * rz, int64_t pos, int where );
- void razf_close ( RAZF * rz );
+RAZF *razf_dopen ( int data_fd, const char *mode );
+RAZF *razf_open ( const char *fn, const char *mode );
+int razf_write ( RAZF *rz, const void *data, int size );
+int razf_read ( RAZF *rz, void *data, int size );
+int64_t razf_seek ( RAZF *rz, int64_t pos, int where );
+void razf_close ( RAZF *rz );
#define razf_tell(rz) ((rz)->out)
- RAZF * razf_open2 ( const char * filename, const char * mode );
- RAZF * razf_dopen2 ( int fd, const char * mode );
- uint64_t razf_tell2 ( RAZF * rz );
- int64_t razf_seek2 ( RAZF * rz, uint64_t voffset, int where );
+RAZF *razf_open2 ( const char *filename, const char *mode );
+RAZF *razf_dopen2 ( int fd, const char *mode );
+uint64_t razf_tell2 ( RAZF *rz );
+int64_t razf_seek2 ( RAZF *rz, uint64_t voffset, int where );
#ifdef __cplusplus
}
diff --git a/standardPregraph/inc/sam_header.h b/standardPregraph/inc/sam_header.h
index c1d3ad9..f8a1e50 100644
--- a/standardPregraph/inc/sam_header.h
+++ b/standardPregraph/inc/sam_header.h
@@ -5,17 +5,17 @@
extern "C" {
#endif
- void * sam_header_parse2 ( const char * headerText );
- void * sam_header_merge ( int n, const void ** dicts );
- void sam_header_free ( void * header );
- char * sam_header_write ( const void * headerDict ); // returns a newly allocated string
+void *sam_header_parse2 ( const char *headerText );
+void *sam_header_merge ( int n, const void **dicts );
+void sam_header_free ( void *header );
+char *sam_header_write ( const void *headerDict ); // returns a newly allocated string
- char ** sam_header2list ( const void * _dict, char type[2], char key_tag[2], int * _n );
+char **sam_header2list ( const void *_dict, char type[2], char key_tag[2], int *_n );
- void * sam_header2tbl ( const void * dict, char type[2], char key_tag[2], char value_tag[2] );
- const char * sam_tbl_get ( void * h, const char * key );
- int sam_tbl_size ( void * h );
- void sam_tbl_destroy ( void * h );
+void *sam_header2tbl ( const void *dict, char type[2], char key_tag[2], char value_tag[2] );
+const char *sam_tbl_get ( void *h, const char *key );
+int sam_tbl_size ( void *h );
+void sam_tbl_destroy ( void *h );
#ifdef __cplusplus
}
diff --git a/standardPregraph/inc/sam_view.h b/standardPregraph/inc/sam_view.h
index 0663765..33d826f 100644
--- a/standardPregraph/inc/sam_view.h
+++ b/standardPregraph/inc/sam_view.h
@@ -3,53 +3,64 @@
static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;
-static char * g_library, *g_rg;
+static char *g_library, *g_rg;
static int g_sol2sanger_tbl[128];
-static void sol2sanger ( bam1_t * b )
+static void sol2sanger ( bam1_t *b )
{
- int l;
- uint8_t * qual = bam1_qual ( b );
+ int l;
+ uint8_t *qual = bam1_qual ( b );
- if ( g_sol2sanger_tbl[30] == 0 )
- {
- for ( l = 0; l != 128; ++l )
- {
- g_sol2sanger_tbl[l] = ( int ) ( 10.0 * log ( 1.0 + pow ( 10.0, ( l - 64 + 33 ) / 10.0 ) ) / log ( 10.0 ) + .499 );
+ if ( g_sol2sanger_tbl[30] == 0 )
+ {
+ for ( l = 0; l != 128; ++l )
+ {
+ g_sol2sanger_tbl[l] = ( int ) ( 10.0 * log ( 1.0 + pow ( 10.0, ( l - 64 + 33 ) / 10.0 ) ) / log ( 10.0 ) + .499 );
- if ( g_sol2sanger_tbl[l] >= 93 ) { g_sol2sanger_tbl[l] = 93; }
- }
- }
+ if ( g_sol2sanger_tbl[l] >= 93 )
+ {
+ g_sol2sanger_tbl[l] = 93;
+ }
+ }
+ }
- for ( l = 0; l < b->core.l_qseq; ++l )
- {
- int q = qual[l];
+ for ( l = 0; l < b->core.l_qseq; ++l )
+ {
+ int q = qual[l];
- if ( q > 127 ) { q = 127; }
+ if ( q > 127 )
+ {
+ q = 127;
+ }
- qual[l] = g_sol2sanger_tbl[q];
- }
+ qual[l] = g_sol2sanger_tbl[q];
+ }
}
-static inline int __g_skip_aln ( const bam_header_t * h, const bam1_t * b )
+static inline int __g_skip_aln ( const bam_header_t *h, const bam1_t *b )
{
- if ( b->core.qual < g_min_mapQ || ( ( b->core.flag & g_flag_on ) != g_flag_on ) || ( b->core.flag & g_flag_off ) )
- { return 1; }
+ if ( b->core.qual < g_min_mapQ || ( ( b->core.flag & g_flag_on ) != g_flag_on ) || ( b->core.flag & g_flag_off ) )
+ {
+ return 1;
+ }
- if ( g_rg )
- {
- uint8_t * s = bam_aux_get ( b, "RG" );
+ if ( g_rg )
+ {
+ uint8_t *s = bam_aux_get ( b, "RG" );
- if ( s && strcmp ( g_rg, ( char * ) ( s + 1 ) ) == 0 ) { return 0; }
- }
+ if ( s && strcmp ( g_rg, ( char * ) ( s + 1 ) ) == 0 )
+ {
+ return 0;
+ }
+ }
- if ( g_library )
- {
- const char * p = bam_get_library ( ( bam_header_t * ) h, b );
- return ( p && strcmp ( p, g_library ) == 0 ) ? 0 : 1;
- }
+ if ( g_library )
+ {
+ const char *p = bam_get_library ( ( bam_header_t * ) h, b );
+ return ( p && strcmp ( p, g_library ) == 0 ) ? 0 : 1;
+ }
- return 0;
+ return 0;
}
diff --git a/standardPregraph/inc/stack.h b/standardPregraph/inc/stack.h
index 4973260..81ca8b3 100644
--- a/standardPregraph/inc/stack.h
+++ b/standardPregraph/inc/stack.h
@@ -1,7 +1,7 @@
/*
* inc/stack.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -29,29 +29,29 @@
typedef struct block_starter
{
- struct block_starter * prev;
- struct block_starter * next;
+ struct block_starter *prev;
+ struct block_starter *next;
} BLOCK_STARTER;
typedef struct stack
{
- BLOCK_STARTER * block_list;
- int index_in_block;
- int items_per_block;
- int item_c;
- size_t item_size;
- BLOCK_STARTER * block_backup;
- int index_backup;
- int item_c_backup;
+ BLOCK_STARTER *block_list;
+ int index_in_block;
+ int items_per_block;
+ int item_c;
+ size_t item_size;
+ BLOCK_STARTER *block_backup;
+ int index_backup;
+ int item_c_backup;
} STACK;
-void stackBackup ( STACK * astack );
-void stackRecover ( STACK * astack );
-void * stackPush ( STACK * astack );
-void * stackPop ( STACK * astack );
-void freeStack ( STACK * astack );
-void emptyStack ( STACK * astack );
-STACK * createStack ( int num_items, size_t unit_size );
+void stackBackup ( STACK *astack );
+void stackRecover ( STACK *astack );
+void *stackPush ( STACK *astack );
+void *stackPop ( STACK *astack );
+void freeStack ( STACK *astack );
+void emptyStack ( STACK *astack );
+STACK *createStack ( int num_items, size_t unit_size );
#endif
diff --git a/standardPregraph/inc/stdinc.h b/standardPregraph/inc/stdinc.h
index dbef7c3..67ca028 100644
--- a/standardPregraph/inc/stdinc.h
+++ b/standardPregraph/inc/stdinc.h
@@ -1,7 +1,7 @@
/*
* inc/stdinc.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
diff --git a/standardPregraph/inc/types.h b/standardPregraph/inc/types.h
index f3f821d..dc4dc59 100644
--- a/standardPregraph/inc/types.h
+++ b/standardPregraph/inc/types.h
@@ -1,7 +1,7 @@
/*
* inc/types.h
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
diff --git a/standardPregraph/inc/xcurses.h b/standardPregraph/inc/xcurses.h
index 06ed84f..24553b7 100644
--- a/standardPregraph/inc/xcurses.h
+++ b/standardPregraph/inc/xcurses.h
@@ -54,11 +54,11 @@ extern "C"
# define bool _bool
#endif
- /*----------------------------------------------------------------------
- *
- * PDCurses Manifest Constants
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Manifest Constants
+ *
+ */
#ifndef FALSE
# define FALSE 0
@@ -76,43 +76,43 @@ extern "C"
# define OK 0
#endif
- /*----------------------------------------------------------------------
- *
- * PDCurses Type Declarations
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Type Declarations
+ *
+ */
- //typedef unsigned char bool; /* PDCurses Boolean type */
+//typedef unsigned char bool; /* PDCurses Boolean type */
#ifdef CHTYPE_LONG
# if _LP64
- typedef unsigned int chtype;
+typedef unsigned int chtype;
# else
- typedef unsigned long chtype; /* 16-bit attr + 16-bit char */
+typedef unsigned long chtype; /* 16-bit attr + 16-bit char */
# endif
#else
- typedef unsigned short chtype; /* 8-bit attr + 8-bit char */
+typedef unsigned short chtype; /* 8-bit attr + 8-bit char */
#endif
#ifdef PDC_WIDE
- typedef chtype cchar_t;
+typedef chtype cchar_t;
#endif
- typedef chtype attr_t;
+typedef chtype attr_t;
- /*----------------------------------------------------------------------
- *
- * PDCurses Mouse Interface -- SYSVR4, with extensions
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Mouse Interface -- SYSVR4, with extensions
+ *
+ */
- typedef struct
- {
- int x; /* absolute column, 0 based, measured in characters */
- int y; /* absolute row, 0 based, measured in characters */
- short button[3]; /* state of each button */
- int changes; /* flags indicating what has changed with the mouse */
- } MOUSE_STATUS;
+typedef struct
+{
+ int x; /* absolute column, 0 based, measured in characters */
+ int y; /* absolute row, 0 based, measured in characters */
+ short button[3]; /* state of each button */
+ int changes; /* flags indicating what has changed with the mouse */
+} MOUSE_STATUS;
#define BUTTON_RELEASED 0x0000
#define BUTTON_PRESSED 0x0001
@@ -131,18 +131,18 @@ extern "C"
#define MOUSE_X_POS (Mouse_status.x)
#define MOUSE_Y_POS (Mouse_status.y)
- /*
- * Bits associated with the .changes field:
- * 3 2 1 0
- * 210987654321098765432109876543210
- * 1 <- button 1 has changed
- * 10 <- button 2 has changed
- * 100 <- button 3 has changed
- * 1000 <- mouse has moved
- * 10000 <- mouse position report
- * 100000 <- mouse wheel up
- * 1000000 <- mouse wheel down
- */
+/*
+ * Bits associated with the .changes field:
+ * 3 2 1 0
+ * 210987654321098765432109876543210
+ * 1 <- button 1 has changed
+ * 10 <- button 2 has changed
+ * 100 <- button 3 has changed
+ * 1000 <- mouse has moved
+ * 10000 <- mouse position report
+ * 100000 <- mouse wheel up
+ * 1000000 <- mouse wheel down
+ */
#define PDC_MOUSE_MOVED 0x0008
#define PDC_MOUSE_POSITION 0x0010
@@ -157,7 +157,7 @@ extern "C"
#define MOUSE_WHEEL_UP (Mouse_status.changes & PDC_MOUSE_WHEEL_UP)
#define MOUSE_WHEEL_DOWN (Mouse_status.changes & PDC_MOUSE_WHEEL_DOWN)
- /* mouse bit-masks */
+/* mouse bit-masks */
#define BUTTON1_RELEASED 0x00000001L
#define BUTTON1_PRESSED 0x00000002L
@@ -180,9 +180,9 @@ extern "C"
#define BUTTON3_TRIPLE_CLICKED 0x00004000L
#define BUTTON3_MOVED 0x00004000L /* PDCurses */
- /* For the ncurses-compatible functions only, BUTTON4_PRESSED and
- BUTTON5_PRESSED are returned for mouse scroll wheel up and down;
- otherwise PDCurses doesn't support buttons 4 and 5 */
+/* For the ncurses-compatible functions only, BUTTON4_PRESSED and
+ BUTTON5_PRESSED are returned for mouse scroll wheel up and down;
+ otherwise PDCurses doesn't support buttons 4 and 5 */
#define BUTTON4_RELEASED 0x00008000L
#define BUTTON4_PRESSED 0x00010000L
@@ -204,17 +204,17 @@ extern "C"
#define ALL_MOUSE_EVENTS 0x1fffffffL
#define REPORT_MOUSE_POSITION 0x20000000L
- /* ncurses mouse interface */
+/* ncurses mouse interface */
- typedef unsigned long mmask_t;
+typedef unsigned long mmask_t;
- typedef struct
- {
- short id; /* unused, always 0 */
- int x, y, z; /* x, y same as MOUSE_STATUS; z unused */
- mmask_t bstate; /* equivalent to changes + button[], but
+typedef struct
+{
+ short id; /* unused, always 0 */
+ int x, y, z; /* x, y same as MOUSE_STATUS; z unused */
+ mmask_t bstate; /* equivalent to changes + button[], but
in the same format as used for mousemask() */
- } MEVENT;
+} MEVENT;
#ifdef NCURSES_MOUSE_VERSION
# define BUTTON_SHIFT BUTTON_MODIFIER_SHIFT
@@ -227,103 +227,103 @@ extern "C"
# define BUTTON_ALT PDC_BUTTON_ALT
#endif
- /*----------------------------------------------------------------------
- *
- * PDCurses Structure Definitions
- *
- */
-
- typedef struct _win /* definition of a window */
- {
- int _cury; /* current pseudo-cursor */
- int _curx;
- int _maxy; /* max window coordinates */
- int _maxx;
- int _begy; /* origin on screen */
- int _begx;
- int _flags; /* window properties */
- chtype _attrs; /* standard attributes and colors */
- chtype _bkgd; /* background, normally blank */
- bool _clear; /* causes clear at next refresh */
- bool _leaveit; /* leaves cursor where it is */
- bool _scroll; /* allows window scrolling */
- bool _nodelay; /* input character wait flag */
- bool _immed; /* immediate update flag */
- bool _sync; /* synchronise window ancestors */
- bool _use_keypad; /* flags keypad key mode active */
- chtype ** _y; /* pointer to line pointer array */
- int * _firstch; /* first changed character in line */
- int * _lastch; /* last changed character in line */
- int _tmarg; /* top of scrolling region */
- int _bmarg; /* bottom of scrolling region */
- int _delayms; /* milliseconds of delay for getch() */
- int _parx, _pary; /* coords relative to parent (0,0) */
- struct _win * _parent; /* subwin's pointer to parent win */
- } WINDOW;
-
- /* Avoid using the SCREEN struct directly -- use the corresponding
- functions if possible. This struct may eventually be made private. */
-
- typedef struct
- {
- bool alive; /* if initscr() called, and not endwin() */
- bool autocr; /* if cr -> lf */
- bool cbreak; /* if terminal unbuffered */
- bool echo; /* if terminal echo */
- bool raw_inp; /* raw input mode (v. cooked input) */
- bool raw_out; /* raw output mode (7 v. 8 bits) */
- bool audible; /* FALSE if the bell is visual */
- bool mono; /* TRUE if current screen is mono */
- bool resized; /* TRUE if TERM has been resized */
- bool orig_attr; /* TRUE if we have the original colors */
- short orig_fore; /* original screen foreground color */
- short orig_back; /* original screen foreground color */
- int cursrow; /* position of physical cursor */
- int curscol; /* position of physical cursor */
- int visibility; /* visibility of cursor */
- int orig_cursor; /* original cursor size */
- int lines; /* new value for LINES */
- int cols; /* new value for COLS */
- unsigned long _trap_mbe; /* trap these mouse button events */
- unsigned long _map_mbe_to_key; /* map mouse buttons to slk */
- int mouse_wait; /* time to wait (in ms) for a
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Structure Definitions
+ *
+ */
+
+typedef struct _win /* definition of a window */
+{
+ int _cury; /* current pseudo-cursor */
+ int _curx;
+ int _maxy; /* max window coordinates */
+ int _maxx;
+ int _begy; /* origin on screen */
+ int _begx;
+ int _flags; /* window properties */
+ chtype _attrs; /* standard attributes and colors */
+ chtype _bkgd; /* background, normally blank */
+ bool _clear; /* causes clear at next refresh */
+ bool _leaveit; /* leaves cursor where it is */
+ bool _scroll; /* allows window scrolling */
+ bool _nodelay; /* input character wait flag */
+ bool _immed; /* immediate update flag */
+ bool _sync; /* synchronise window ancestors */
+ bool _use_keypad; /* flags keypad key mode active */
+ chtype **_y; /* pointer to line pointer array */
+ int *_firstch; /* first changed character in line */
+ int *_lastch; /* last changed character in line */
+ int _tmarg; /* top of scrolling region */
+ int _bmarg; /* bottom of scrolling region */
+ int _delayms; /* milliseconds of delay for getch() */
+ int _parx, _pary; /* coords relative to parent (0,0) */
+ struct _win *_parent; /* subwin's pointer to parent win */
+} WINDOW;
+
+/* Avoid using the SCREEN struct directly -- use the corresponding
+ functions if possible. This struct may eventually be made private. */
+
+typedef struct
+{
+ bool alive; /* if initscr() called, and not endwin() */
+ bool autocr; /* if cr -> lf */
+ bool cbreak; /* if terminal unbuffered */
+ bool echo; /* if terminal echo */
+ bool raw_inp; /* raw input mode (v. cooked input) */
+ bool raw_out; /* raw output mode (7 v. 8 bits) */
+ bool audible; /* FALSE if the bell is visual */
+ bool mono; /* TRUE if current screen is mono */
+ bool resized; /* TRUE if TERM has been resized */
+ bool orig_attr; /* TRUE if we have the original colors */
+ short orig_fore; /* original screen foreground color */
+ short orig_back; /* original screen foreground color */
+ int cursrow; /* position of physical cursor */
+ int curscol; /* position of physical cursor */
+ int visibility; /* visibility of cursor */
+ int orig_cursor; /* original cursor size */
+ int lines; /* new value for LINES */
+ int cols; /* new value for COLS */
+ unsigned long _trap_mbe; /* trap these mouse button events */
+ unsigned long _map_mbe_to_key; /* map mouse buttons to slk */
+ int mouse_wait; /* time to wait (in ms) for a
button release after a press, in
order to count it as a click */
- int slklines; /* lines in use by slk_init() */
- WINDOW * slk_winptr; /* window for slk */
- int linesrippedoff; /* lines ripped off via ripoffline() */
- int linesrippedoffontop; /* lines ripped off on
+ int slklines; /* lines in use by slk_init() */
+ WINDOW *slk_winptr; /* window for slk */
+ int linesrippedoff; /* lines ripped off via ripoffline() */
+ int linesrippedoffontop; /* lines ripped off on
top via ripoffline() */
- int delaytenths; /* 1/10ths second to wait block
+ int delaytenths; /* 1/10ths second to wait block
getch() for */
- bool _preserve; /* TRUE if screen background
+ bool _preserve; /* TRUE if screen background
to be preserved */
- int _restore; /* specifies if screen background
+ int _restore; /* specifies if screen background
to be restored, and how */
- bool save_key_modifiers; /* TRUE if each key modifiers saved
+ bool save_key_modifiers; /* TRUE if each key modifiers saved
with each key press */
- bool return_key_modifiers; /* TRUE if modifier keys are
+ bool return_key_modifiers; /* TRUE if modifier keys are
returned as "real" keys */
- bool key_code; /* TRUE if last key is a special key;
+ bool key_code; /* TRUE if last key is a special key;
used internally by get_wch() */
#ifdef XCURSES
- int XcurscrSize; /* size of Xcurscr shared memory block */
- bool sb_on;
- int sb_viewport_y;
- int sb_viewport_x;
- int sb_total_y;
- int sb_total_x;
- int sb_cur_y;
- int sb_cur_x;
+ int XcurscrSize; /* size of Xcurscr shared memory block */
+ bool sb_on;
+ int sb_viewport_y;
+ int sb_viewport_x;
+ int sb_total_y;
+ int sb_total_x;
+ int sb_cur_y;
+ int sb_cur_x;
#endif
- short line_color; /* color of line attributes - default -1 */
- } SCREEN;
+ short line_color; /* color of line attributes - default -1 */
+} SCREEN;
- /*----------------------------------------------------------------------
- *
- * PDCurses External Variables
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses External Variables
+ *
+ */
#ifdef PDC_DLL_BUILD
# ifdef CURSES_LIBRARY
@@ -335,60 +335,60 @@ extern "C"
# define PDCEX extern
#endif
- PDCEX int LINES; /* terminal height */
- PDCEX int COLS; /* terminal width */
- PDCEX WINDOW * stdscr; /* the default screen window */
- PDCEX WINDOW * curscr; /* the current screen image */
- PDCEX SCREEN * SP; /* curses variables */
- PDCEX MOUSE_STATUS Mouse_status;
- PDCEX int COLORS;
- PDCEX int COLOR_PAIRS;
- PDCEX int TABSIZE;
- PDCEX chtype acs_map[]; /* alternate character set map */
- PDCEX char ttytype[]; /* terminal name/description */
+PDCEX int LINES; /* terminal height */
+PDCEX int COLS; /* terminal width */
+PDCEX WINDOW *stdscr; /* the default screen window */
+PDCEX WINDOW *curscr; /* the current screen image */
+PDCEX SCREEN *SP; /* curses variables */
+PDCEX MOUSE_STATUS Mouse_status;
+PDCEX int COLORS;
+PDCEX int COLOR_PAIRS;
+PDCEX int TABSIZE;
+PDCEX chtype acs_map[]; /* alternate character set map */
+PDCEX char ttytype[]; /* terminal name/description */
- /*man-start**************************************************************
+/*man-start**************************************************************
- PDCurses Text Attributes
- ========================
+PDCurses Text Attributes
+========================
- Originally, PDCurses used a short (16 bits) for its chtype. To include
- color, a number of things had to be sacrificed from the strict Unix and
- System V support. The main problem was fitting all character attributes
- and color into an unsigned char (all 8 bits!).
+Originally, PDCurses used a short (16 bits) for its chtype. To include
+color, a number of things had to be sacrificed from the strict Unix and
+System V support. The main problem was fitting all character attributes
+and color into an unsigned char (all 8 bits!).
- Today, PDCurses by default uses a long (32 bits) for its chtype, as in
- System V. The short chtype is still available, by undefining CHTYPE_LONG
- and rebuilding the library.
+Today, PDCurses by default uses a long (32 bits) for its chtype, as in
+System V. The short chtype is still available, by undefining CHTYPE_LONG
+and rebuilding the library.
- The following is the structure of a win->_attrs chtype:
+The following is the structure of a win->_attrs chtype:
- short form:
+short form:
- -------------------------------------------------
- |15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0|
- -------------------------------------------------
- color number | attrs | character eg 'a'
+-------------------------------------------------
+|15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0|
+-------------------------------------------------
+ color number | attrs | character eg 'a'
- The available non-color attributes are bold, reverse and blink. Others
- have no effect. The high order char is an index into an array of
- physical colors (defined in color.c) -- 32 foreground/background color
- pairs (5 bits) plus 3 bits for other attributes.
+The available non-color attributes are bold, reverse and blink. Others
+have no effect. The high order char is an index into an array of
+physical colors (defined in color.c) -- 32 foreground/background color
+pairs (5 bits) plus 3 bits for other attributes.
- long form:
+long form:
- ----------------------------------------------------------------------------
- |31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|..| 3| 2| 1| 0|
- ----------------------------------------------------------------------------
- color number | modifiers | character eg 'a'
+----------------------------------------------------------------------------
+|31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16|15|14|13|12|..| 3| 2| 1| 0|
+----------------------------------------------------------------------------
+ color number | modifiers | character eg 'a'
- The available non-color attributes are bold, underline, invisible,
- right-line, left-line, protect, reverse and blink. 256 color pairs (8
- bits), 8 bits for other attributes, and 16 bits for character data.
+The available non-color attributes are bold, underline, invisible,
+right-line, left-line, protect, reverse and blink. 256 color pairs (8
+bits), 8 bits for other attributes, and 16 bits for character data.
- **man-end****************************************************************/
+**man-end****************************************************************/
- /*** Video attribute macros ***/
+/*** Video attribute macros ***/
#define A_NORMAL (chtype)0
@@ -440,8 +440,8 @@ extern "C"
#define ATR_MSK A_ATTRIBUTES /* Obsolete */
#define ATR_NRM A_NORMAL /* Obsolete */
- /* For use with attr_t -- X/Open says, "these shall be distinct", so
- this is a non-conforming implementation. */
+/* For use with attr_t -- X/Open says, "these shall be distinct", so
+ this is a non-conforming implementation. */
#define WA_ALTCHARSET A_ALTCHARSET
#define WA_BLINK A_BLINK
@@ -460,11 +460,11 @@ extern "C"
#define WA_TOP A_NORMAL
#define WA_VERTICAL A_NORMAL
- /*** Alternate character set macros ***/
+/*** Alternate character set macros ***/
- /* 'w' = 32-bit chtype; acs_map[] index | A_ALTCHARSET
- 'n' = 16-bit chtype; it gets the fallback set because no bit is
- available for A_ALTCHARSET */
+/* 'w' = 32-bit chtype; acs_map[] index | A_ALTCHARSET
+ 'n' = 16-bit chtype; it gets the fallback set because no bit is
+ available for A_ALTCHARSET */
#ifdef CHTYPE_LONG
# define ACS_PICK(w, n) ((chtype)w | A_ALTCHARSET)
@@ -472,7 +472,7 @@ extern "C"
# define ACS_PICK(w, n) ((chtype)n)
#endif
- /* VT100-compatible symbols -- box chars */
+/* VT100-compatible symbols -- box chars */
#define ACS_ULCORNER ACS_PICK('l', '+')
#define ACS_LLCORNER ACS_PICK('m', '+')
@@ -486,7 +486,7 @@ extern "C"
#define ACS_VLINE ACS_PICK('x', '|')
#define ACS_PLUS ACS_PICK('n', '+')
- /* VT100-compatible symbols -- other */
+/* VT100-compatible symbols -- other */
#define ACS_S1 ACS_PICK('o', '-')
#define ACS_S9 ACS_PICK('s', '_')
@@ -496,9 +496,9 @@ extern "C"
#define ACS_PLMINUS ACS_PICK('g', '#')
#define ACS_BULLET ACS_PICK('~', 'o')
- /* Teletype 5410v1 symbols -- these are defined in SysV curses, but
- are not well-supported by most terminals. Stick to VT100 characters
- for optimum portability. */
+/* Teletype 5410v1 symbols -- these are defined in SysV curses, but
+ are not well-supported by most terminals. Stick to VT100 characters
+ for optimum portability. */
#define ACS_LARROW ACS_PICK(',', '<')
#define ACS_RARROW ACS_PICK('+', '>')
@@ -508,8 +508,8 @@ extern "C"
#define ACS_LANTERN ACS_PICK('i', '*')
#define ACS_BLOCK ACS_PICK('0', '#')
- /* That goes double for these -- undocumented SysV symbols. Don't use
- them. */
+/* That goes double for these -- undocumented SysV symbols. Don't use
+ them. */
#define ACS_S3 ACS_PICK('p', '-')
#define ACS_S7 ACS_PICK('r', '-')
@@ -519,7 +519,7 @@ extern "C"
#define ACS_NEQUAL ACS_PICK('|', '+')
#define ACS_STERLING ACS_PICK('}', 'L')
- /* Box char aliases */
+/* Box char aliases */
#define ACS_BSSB ACS_ULCORNER
#define ACS_SSBB ACS_LLCORNER
@@ -533,7 +533,7 @@ extern "C"
#define ACS_SBSB ACS_VLINE
#define ACS_SSSS ACS_PLUS
- /* cchar_t aliases */
+/* cchar_t aliases */
#ifdef PDC_WIDE
# define WACS_ULCORNER (&(acs_map['l']))
@@ -585,7 +585,7 @@ extern "C"
# define WACS_SSSS WACS_PLUS
#endif
- /*** Color macros ***/
+/*** Color macros ***/
#define COLOR_BLACK 0
@@ -605,12 +605,12 @@ extern "C"
#define COLOR_WHITE 7
- /*----------------------------------------------------------------------
- *
- * Function and Keypad Key Definitions.
- * Many are just for compatibility.
- *
- */
+/*----------------------------------------------------------------------
+ *
+ * Function and Keypad Key Definitions.
+ * Many are just for compatibility.
+ *
+ */
#define KEY_CODE_YES 0x100 /* If get_wch() gives a key code */
@@ -703,7 +703,7 @@ extern "C"
#define KEY_SUSPEND 0x195 /* suspend key */
#define KEY_UNDO 0x196 /* undo key */
- /* PDCurses-specific key definitions -- PC only */
+/* PDCurses-specific key definitions -- PC only */
#define ALT_0 0x197
#define ALT_1 0x198
@@ -861,479 +861,479 @@ extern "C"
#define KEY_F(n) (KEY_F0 + (n))
- /*----------------------------------------------------------------------
- *
- * PDCurses Function Declarations
- *
- */
-
- /* Standard */
-
- int addch ( const chtype );
- int addchnstr ( const chtype *, int );
- int addchstr ( const chtype * );
- int addnstr ( const char *, int );
- int addstr ( const char * );
- int attroff ( chtype );
- int attron ( chtype );
- int attrset ( chtype );
- int attr_get ( attr_t *, short *, void * );
- int attr_off ( attr_t, void * );
- int attr_on ( attr_t, void * );
- int attr_set ( attr_t, short, void * );
- int baudrate ( void );
- int beep ( void );
- int bkgd ( chtype );
- void bkgdset ( chtype );
- int border ( chtype, chtype, chtype, chtype, chtype, chtype, chtype, chtype );
- int box ( WINDOW *, chtype, chtype );
- bool can_change_color ( void );
- int cbreak ( void );
- int chgat ( int, attr_t, short, const void * );
- int clearok ( WINDOW *, bool );
- int clear ( void );
- int clrtobot ( void );
- int clrtoeol ( void );
- int color_content ( short, short *, short *, short * );
- int color_set ( short, void * );
- int copywin ( const WINDOW *, WINDOW *, int, int, int, int, int, int, int );
- int curs_set ( int );
- int def_prog_mode ( void );
- int def_shell_mode ( void );
- int delay_output ( int );
- int delch ( void );
- int deleteln ( void );
- void delscreen ( SCREEN * );
- int delwin ( WINDOW * );
- WINDOW * derwin ( WINDOW *, int, int, int, int );
- int doupdate ( void );
- WINDOW * dupwin ( WINDOW * );
- int echochar ( const chtype );
- int echo ( void );
- int endwin ( void );
- char erasechar ( void );
- int erase ( void );
- void filter ( void );
- int flash ( void );
- int flushinp ( void );
- chtype getbkgd ( WINDOW * );
- int getnstr ( char *, int );
- int getstr ( char * );
- WINDOW * getwin ( FILE * );
- int halfdelay ( int );
- bool has_colors ( void );
- bool has_ic ( void );
- bool has_il ( void );
- int hline ( chtype, int );
- void idcok ( WINDOW *, bool );
- int idlok ( WINDOW *, bool );
- void immedok ( WINDOW *, bool );
- int inchnstr ( chtype *, int );
- int inchstr ( chtype * );
- chtype inch ( void );
- int init_color ( short, short, short, short );
- int init_pair ( short, short, short );
- WINDOW * initscr ( void );
- int innstr ( char *, int );
- int insch ( chtype );
- int insdelln ( int );
- int insertln ( void );
- int insnstr ( const char *, int );
- int insstr ( const char * );
- int instr ( char * );
- int intrflush ( WINDOW *, bool );
- bool isendwin ( void );
- bool is_linetouched ( WINDOW *, int );
- bool is_wintouched ( WINDOW * );
- char * keyname ( int );
- int keypad ( WINDOW *, bool );
- char killchar ( void );
- int leaveok ( WINDOW *, bool );
- char * longname ( void );
- int meta ( WINDOW *, bool );
- int move ( int, int );
- int mvaddch ( int, int, const chtype );
- int mvaddchnstr ( int, int, const chtype *, int );
- int mvaddchstr ( int, int, const chtype * );
- int mvaddnstr ( int, int, const char *, int );
- int mvaddstr ( int, int, const char * );
- int mvchgat ( int, int, int, attr_t, short, const void * );
- int mvcur ( int, int, int, int );
- int mvdelch ( int, int );
- int mvderwin ( WINDOW *, int, int );
- int mvgetch ( int, int );
- int mvgetnstr ( int, int, char *, int );
- int mvgetstr ( int, int, char * );
- int mvhline ( int, int, chtype, int );
- chtype mvinch ( int, int );
- int mvinchnstr ( int, int, chtype *, int );
- int mvinchstr ( int, int, chtype * );
- int mvinnstr ( int, int, char *, int );
- int mvinsch ( int, int, chtype );
- int mvinsnstr ( int, int, const char *, int );
- int mvinsstr ( int, int, const char * );
- int mvinstr ( int, int, char * );
- int mvprintw ( int, int, const char *, ... );
- int mvscanw ( int, int, const char *, ... );
- int mvvline ( int, int, chtype, int );
- int mvwaddchnstr ( WINDOW *, int, int, const chtype *, int );
- int mvwaddchstr ( WINDOW *, int, int, const chtype * );
- int mvwaddch ( WINDOW *, int, int, const chtype );
- int mvwaddnstr ( WINDOW *, int, int, const char *, int );
- int mvwaddstr ( WINDOW *, int, int, const char * );
- int mvwchgat ( WINDOW *, int, int, int, attr_t, short, const void * );
- int mvwdelch ( WINDOW *, int, int );
- int mvwgetch ( WINDOW *, int, int );
- int mvwgetnstr ( WINDOW *, int, int, char *, int );
- int mvwgetstr ( WINDOW *, int, int, char * );
- int mvwhline ( WINDOW *, int, int, chtype, int );
- int mvwinchnstr ( WINDOW *, int, int, chtype *, int );
- int mvwinchstr ( WINDOW *, int, int, chtype * );
- chtype mvwinch ( WINDOW *, int, int );
- int mvwinnstr ( WINDOW *, int, int, char *, int );
- int mvwinsch ( WINDOW *, int, int, chtype );
- int mvwinsnstr ( WINDOW *, int, int, const char *, int );
- int mvwinsstr ( WINDOW *, int, int, const char * );
- int mvwinstr ( WINDOW *, int, int, char * );
- int mvwin ( WINDOW *, int, int );
- int mvwprintw ( WINDOW *, int, int, const char *, ... );
- int mvwscanw ( WINDOW *, int, int, const char *, ... );
- int mvwvline ( WINDOW *, int, int, chtype, int );
- int napms ( int );
- WINDOW * newpad ( int, int );
- SCREEN * newterm ( const char *, FILE *, FILE * );
- WINDOW * newwin ( int, int, int, int );
- int nl ( void );
- int nocbreak ( void );
- int nodelay ( WINDOW *, bool );
- int noecho ( void );
- int nonl ( void );
- void noqiflush ( void );
- int noraw ( void );
- int notimeout ( WINDOW *, bool );
- int overlay ( const WINDOW *, WINDOW * );
- int overwrite ( const WINDOW *, WINDOW * );
- int pair_content ( short, short *, short * );
- int pechochar ( WINDOW *, chtype );
- int pnoutrefresh ( WINDOW *, int, int, int, int, int, int );
- int prefresh ( WINDOW *, int, int, int, int, int, int );
- int printw ( const char *, ... );
- int putwin ( WINDOW *, FILE * );
- void qiflush ( void );
- int raw ( void );
- int redrawwin ( WINDOW * );
- int refresh ( void );
- int reset_prog_mode ( void );
- int reset_shell_mode ( void );
- int resetty ( void );
- int ripoffline ( int, int ( * ) ( WINDOW *, int ) );
- int savetty ( void );
- int scanw ( const char *, ... );
- int scr_dump ( const char * );
- int scr_init ( const char * );
- int scr_restore ( const char * );
- int scr_set ( const char * );
- int scrl ( int );
- int scroll ( WINDOW * );
- int scrollok ( WINDOW *, bool );
- SCREEN * set_term ( SCREEN * );
- int setscrreg ( int, int );
- int slk_attroff ( const chtype );
- int slk_attr_off ( const attr_t, void * );
- int slk_attron ( const chtype );
- int slk_attr_on ( const attr_t, void * );
- int slk_attrset ( const chtype );
- int slk_attr_set ( const attr_t, short, void * );
- int slk_clear ( void );
- int slk_color ( short );
- int slk_init ( int );
- char * slk_label ( int );
- int slk_noutrefresh ( void );
- int slk_refresh ( void );
- int slk_restore ( void );
- int slk_set ( int, const char *, int );
- int slk_touch ( void );
- int standend ( void );
- int standout ( void );
- int start_color ( void );
- WINDOW * subpad ( WINDOW *, int, int, int, int );
- WINDOW * subwin ( WINDOW *, int, int, int, int );
- int syncok ( WINDOW *, bool );
- chtype termattrs ( void );
- attr_t term_attrs ( void );
- char * termname ( void );
- void timeout ( int );
- int touchline ( WINDOW *, int, int );
- int touchwin ( WINDOW * );
- int typeahead ( int );
- int untouchwin ( WINDOW * );
- void use_env ( bool );
- int vidattr ( chtype );
- int vid_attr ( attr_t, short, void * );
- int vidputs ( chtype, int ( * ) ( int ) );
- int vid_puts ( attr_t, short, void *, int ( * ) ( int ) );
- int vline ( chtype, int );
- int vw_printw ( WINDOW *, const char *, va_list );
- int vwprintw ( WINDOW *, const char *, va_list );
- int vw_scanw ( WINDOW *, const char *, va_list );
- int vwscanw ( WINDOW *, const char *, va_list );
- int waddchnstr ( WINDOW *, const chtype *, int );
- int waddchstr ( WINDOW *, const chtype * );
- int waddch ( WINDOW *, const chtype );
- int waddnstr ( WINDOW *, const char *, int );
- int waddstr ( WINDOW *, const char * );
- int wattroff ( WINDOW *, chtype );
- int wattron ( WINDOW *, chtype );
- int wattrset ( WINDOW *, chtype );
- int wattr_get ( WINDOW *, attr_t *, short *, void * );
- int wattr_off ( WINDOW *, attr_t, void * );
- int wattr_on ( WINDOW *, attr_t, void * );
- int wattr_set ( WINDOW *, attr_t, short, void * );
- void wbkgdset ( WINDOW *, chtype );
- int wbkgd ( WINDOW *, chtype );
- int wborder ( WINDOW *, chtype, chtype, chtype, chtype,
- chtype, chtype, chtype, chtype );
- int wchgat ( WINDOW *, int, attr_t, short, const void * );
- int wclear ( WINDOW * );
- int wclrtobot ( WINDOW * );
- int wclrtoeol ( WINDOW * );
- int wcolor_set ( WINDOW *, short, void * );
- void wcursyncup ( WINDOW * );
- int wdelch ( WINDOW * );
- int wdeleteln ( WINDOW * );
- int wechochar ( WINDOW *, const chtype );
- int werase ( WINDOW * );
- int wgetch ( WINDOW * );
- int wgetnstr ( WINDOW *, char *, int );
- int wgetstr ( WINDOW *, char * );
- int whline ( WINDOW *, chtype, int );
- int winchnstr ( WINDOW *, chtype *, int );
- int winchstr ( WINDOW *, chtype * );
- chtype winch ( WINDOW * );
- int winnstr ( WINDOW *, char *, int );
- int winsch ( WINDOW *, chtype );
- int winsdelln ( WINDOW *, int );
- int winsertln ( WINDOW * );
- int winsnstr ( WINDOW *, const char *, int );
- int winsstr ( WINDOW *, const char * );
- int winstr ( WINDOW *, char * );
- int wmove ( WINDOW *, int, int );
- int wnoutrefresh ( WINDOW * );
- int wprintw ( WINDOW *, const char *, ... );
- int wredrawln ( WINDOW *, int, int );
- int wrefresh ( WINDOW * );
- int wscanw ( WINDOW *, const char *, ... );
- int wscrl ( WINDOW *, int );
- int wsetscrreg ( WINDOW *, int, int );
- int wstandend ( WINDOW * );
- int wstandout ( WINDOW * );
- void wsyncdown ( WINDOW * );
- void wsyncup ( WINDOW * );
- void wtimeout ( WINDOW *, int );
- int wtouchln ( WINDOW *, int, int, int );
- int wvline ( WINDOW *, chtype, int );
-
- /* Wide-character functions */
+/*----------------------------------------------------------------------
+ *
+ * PDCurses Function Declarations
+ *
+ */
+
+/* Standard */
+
+int addch ( const chtype );
+int addchnstr ( const chtype *, int );
+int addchstr ( const chtype * );
+int addnstr ( const char *, int );
+int addstr ( const char * );
+int attroff ( chtype );
+int attron ( chtype );
+int attrset ( chtype );
+int attr_get ( attr_t *, short *, void * );
+int attr_off ( attr_t, void * );
+int attr_on ( attr_t, void * );
+int attr_set ( attr_t, short, void * );
+int baudrate ( void );
+int beep ( void );
+int bkgd ( chtype );
+void bkgdset ( chtype );
+int border ( chtype, chtype, chtype, chtype, chtype, chtype, chtype, chtype );
+int box ( WINDOW *, chtype, chtype );
+bool can_change_color ( void );
+int cbreak ( void );
+int chgat ( int, attr_t, short, const void * );
+int clearok ( WINDOW *, bool );
+int clear ( void );
+int clrtobot ( void );
+int clrtoeol ( void );
+int color_content ( short, short *, short *, short * );
+int color_set ( short, void * );
+int copywin ( const WINDOW *, WINDOW *, int, int, int, int, int, int, int );
+int curs_set ( int );
+int def_prog_mode ( void );
+int def_shell_mode ( void );
+int delay_output ( int );
+int delch ( void );
+int deleteln ( void );
+void delscreen ( SCREEN * );
+int delwin ( WINDOW * );
+WINDOW *derwin ( WINDOW *, int, int, int, int );
+int doupdate ( void );
+WINDOW *dupwin ( WINDOW * );
+int echochar ( const chtype );
+int echo ( void );
+int endwin ( void );
+char erasechar ( void );
+int erase ( void );
+void filter ( void );
+int flash ( void );
+int flushinp ( void );
+chtype getbkgd ( WINDOW * );
+int getnstr ( char *, int );
+int getstr ( char * );
+WINDOW *getwin ( FILE * );
+int halfdelay ( int );
+bool has_colors ( void );
+bool has_ic ( void );
+bool has_il ( void );
+int hline ( chtype, int );
+void idcok ( WINDOW *, bool );
+int idlok ( WINDOW *, bool );
+void immedok ( WINDOW *, bool );
+int inchnstr ( chtype *, int );
+int inchstr ( chtype * );
+chtype inch ( void );
+int init_color ( short, short, short, short );
+int init_pair ( short, short, short );
+WINDOW *initscr ( void );
+int innstr ( char *, int );
+int insch ( chtype );
+int insdelln ( int );
+int insertln ( void );
+int insnstr ( const char *, int );
+int insstr ( const char * );
+int instr ( char * );
+int intrflush ( WINDOW *, bool );
+bool isendwin ( void );
+bool is_linetouched ( WINDOW *, int );
+bool is_wintouched ( WINDOW * );
+char *keyname ( int );
+int keypad ( WINDOW *, bool );
+char killchar ( void );
+int leaveok ( WINDOW *, bool );
+char *longname ( void );
+int meta ( WINDOW *, bool );
+int move ( int, int );
+int mvaddch ( int, int, const chtype );
+int mvaddchnstr ( int, int, const chtype *, int );
+int mvaddchstr ( int, int, const chtype * );
+int mvaddnstr ( int, int, const char *, int );
+int mvaddstr ( int, int, const char * );
+int mvchgat ( int, int, int, attr_t, short, const void * );
+int mvcur ( int, int, int, int );
+int mvdelch ( int, int );
+int mvderwin ( WINDOW *, int, int );
+int mvgetch ( int, int );
+int mvgetnstr ( int, int, char *, int );
+int mvgetstr ( int, int, char * );
+int mvhline ( int, int, chtype, int );
+chtype mvinch ( int, int );
+int mvinchnstr ( int, int, chtype *, int );
+int mvinchstr ( int, int, chtype * );
+int mvinnstr ( int, int, char *, int );
+int mvinsch ( int, int, chtype );
+int mvinsnstr ( int, int, const char *, int );
+int mvinsstr ( int, int, const char * );
+int mvinstr ( int, int, char * );
+int mvprintw ( int, int, const char *, ... );
+int mvscanw ( int, int, const char *, ... );
+int mvvline ( int, int, chtype, int );
+int mvwaddchnstr ( WINDOW *, int, int, const chtype *, int );
+int mvwaddchstr ( WINDOW *, int, int, const chtype * );
+int mvwaddch ( WINDOW *, int, int, const chtype );
+int mvwaddnstr ( WINDOW *, int, int, const char *, int );
+int mvwaddstr ( WINDOW *, int, int, const char * );
+int mvwchgat ( WINDOW *, int, int, int, attr_t, short, const void * );
+int mvwdelch ( WINDOW *, int, int );
+int mvwgetch ( WINDOW *, int, int );
+int mvwgetnstr ( WINDOW *, int, int, char *, int );
+int mvwgetstr ( WINDOW *, int, int, char * );
+int mvwhline ( WINDOW *, int, int, chtype, int );
+int mvwinchnstr ( WINDOW *, int, int, chtype *, int );
+int mvwinchstr ( WINDOW *, int, int, chtype * );
+chtype mvwinch ( WINDOW *, int, int );
+int mvwinnstr ( WINDOW *, int, int, char *, int );
+int mvwinsch ( WINDOW *, int, int, chtype );
+int mvwinsnstr ( WINDOW *, int, int, const char *, int );
+int mvwinsstr ( WINDOW *, int, int, const char * );
+int mvwinstr ( WINDOW *, int, int, char * );
+int mvwin ( WINDOW *, int, int );
+int mvwprintw ( WINDOW *, int, int, const char *, ... );
+int mvwscanw ( WINDOW *, int, int, const char *, ... );
+int mvwvline ( WINDOW *, int, int, chtype, int );
+int napms ( int );
+WINDOW *newpad ( int, int );
+SCREEN *newterm ( const char *, FILE *, FILE * );
+WINDOW *newwin ( int, int, int, int );
+int nl ( void );
+int nocbreak ( void );
+int nodelay ( WINDOW *, bool );
+int noecho ( void );
+int nonl ( void );
+void noqiflush ( void );
+int noraw ( void );
+int notimeout ( WINDOW *, bool );
+int overlay ( const WINDOW *, WINDOW * );
+int overwrite ( const WINDOW *, WINDOW * );
+int pair_content ( short, short *, short * );
+int pechochar ( WINDOW *, chtype );
+int pnoutrefresh ( WINDOW *, int, int, int, int, int, int );
+int prefresh ( WINDOW *, int, int, int, int, int, int );
+int printw ( const char *, ... );
+int putwin ( WINDOW *, FILE * );
+void qiflush ( void );
+int raw ( void );
+int redrawwin ( WINDOW * );
+int refresh ( void );
+int reset_prog_mode ( void );
+int reset_shell_mode ( void );
+int resetty ( void );
+int ripoffline ( int, int ( * ) ( WINDOW *, int ) );
+int savetty ( void );
+int scanw ( const char *, ... );
+int scr_dump ( const char * );
+int scr_init ( const char * );
+int scr_restore ( const char * );
+int scr_set ( const char * );
+int scrl ( int );
+int scroll ( WINDOW * );
+int scrollok ( WINDOW *, bool );
+SCREEN *set_term ( SCREEN * );
+int setscrreg ( int, int );
+int slk_attroff ( const chtype );
+int slk_attr_off ( const attr_t, void * );
+int slk_attron ( const chtype );
+int slk_attr_on ( const attr_t, void * );
+int slk_attrset ( const chtype );
+int slk_attr_set ( const attr_t, short, void * );
+int slk_clear ( void );
+int slk_color ( short );
+int slk_init ( int );
+char *slk_label ( int );
+int slk_noutrefresh ( void );
+int slk_refresh ( void );
+int slk_restore ( void );
+int slk_set ( int, const char *, int );
+int slk_touch ( void );
+int standend ( void );
+int standout ( void );
+int start_color ( void );
+WINDOW *subpad ( WINDOW *, int, int, int, int );
+WINDOW *subwin ( WINDOW *, int, int, int, int );
+int syncok ( WINDOW *, bool );
+chtype termattrs ( void );
+attr_t term_attrs ( void );
+char *termname ( void );
+void timeout ( int );
+int touchline ( WINDOW *, int, int );
+int touchwin ( WINDOW * );
+int typeahead ( int );
+int untouchwin ( WINDOW * );
+void use_env ( bool );
+int vidattr ( chtype );
+int vid_attr ( attr_t, short, void * );
+int vidputs ( chtype, int ( * ) ( int ) );
+int vid_puts ( attr_t, short, void *, int ( * ) ( int ) );
+int vline ( chtype, int );
+int vw_printw ( WINDOW *, const char *, va_list );
+int vwprintw ( WINDOW *, const char *, va_list );
+int vw_scanw ( WINDOW *, const char *, va_list );
+int vwscanw ( WINDOW *, const char *, va_list );
+int waddchnstr ( WINDOW *, const chtype *, int );
+int waddchstr ( WINDOW *, const chtype * );
+int waddch ( WINDOW *, const chtype );
+int waddnstr ( WINDOW *, const char *, int );
+int waddstr ( WINDOW *, const char * );
+int wattroff ( WINDOW *, chtype );
+int wattron ( WINDOW *, chtype );
+int wattrset ( WINDOW *, chtype );
+int wattr_get ( WINDOW *, attr_t *, short *, void * );
+int wattr_off ( WINDOW *, attr_t, void * );
+int wattr_on ( WINDOW *, attr_t, void * );
+int wattr_set ( WINDOW *, attr_t, short, void * );
+void wbkgdset ( WINDOW *, chtype );
+int wbkgd ( WINDOW *, chtype );
+int wborder ( WINDOW *, chtype, chtype, chtype, chtype,
+ chtype, chtype, chtype, chtype );
+int wchgat ( WINDOW *, int, attr_t, short, const void * );
+int wclear ( WINDOW * );
+int wclrtobot ( WINDOW * );
+int wclrtoeol ( WINDOW * );
+int wcolor_set ( WINDOW *, short, void * );
+void wcursyncup ( WINDOW * );
+int wdelch ( WINDOW * );
+int wdeleteln ( WINDOW * );
+int wechochar ( WINDOW *, const chtype );
+int werase ( WINDOW * );
+int wgetch ( WINDOW * );
+int wgetnstr ( WINDOW *, char *, int );
+int wgetstr ( WINDOW *, char * );
+int whline ( WINDOW *, chtype, int );
+int winchnstr ( WINDOW *, chtype *, int );
+int winchstr ( WINDOW *, chtype * );
+chtype winch ( WINDOW * );
+int winnstr ( WINDOW *, char *, int );
+int winsch ( WINDOW *, chtype );
+int winsdelln ( WINDOW *, int );
+int winsertln ( WINDOW * );
+int winsnstr ( WINDOW *, const char *, int );
+int winsstr ( WINDOW *, const char * );
+int winstr ( WINDOW *, char * );
+int wmove ( WINDOW *, int, int );
+int wnoutrefresh ( WINDOW * );
+int wprintw ( WINDOW *, const char *, ... );
+int wredrawln ( WINDOW *, int, int );
+int wrefresh ( WINDOW * );
+int wscanw ( WINDOW *, const char *, ... );
+int wscrl ( WINDOW *, int );
+int wsetscrreg ( WINDOW *, int, int );
+int wstandend ( WINDOW * );
+int wstandout ( WINDOW * );
+void wsyncdown ( WINDOW * );
+void wsyncup ( WINDOW * );
+void wtimeout ( WINDOW *, int );
+int wtouchln ( WINDOW *, int, int, int );
+int wvline ( WINDOW *, chtype, int );
+
+/* Wide-character functions */
#ifdef PDC_WIDE
- int addnwstr ( const wchar_t *, int );
- int addwstr ( const wchar_t * );
- int add_wch ( const cchar_t * );
- int add_wchnstr ( const cchar_t *, int );
- int add_wchstr ( const cchar_t * );
- int border_set ( const cchar_t *, const cchar_t *, const cchar_t *,
- const cchar_t *, const cchar_t *, const cchar_t *,
- const cchar_t *, const cchar_t * );
- int box_set ( WINDOW *, const cchar_t *, const cchar_t * );
- int echo_wchar ( const cchar_t * );
- int erasewchar ( wchar_t * );
- int getbkgrnd ( cchar_t * );
- int getcchar ( const cchar_t *, wchar_t *, attr_t *, short *, void * );
- int getn_wstr ( wint_t *, int );
- int get_wch ( wint_t * );
- int get_wstr ( wint_t * );
- int hline_set ( const cchar_t *, int );
- int innwstr ( wchar_t *, int );
- int ins_nwstr ( const wchar_t *, int );
- int ins_wch ( const cchar_t * );
- int ins_wstr ( const wchar_t * );
- int inwstr ( wchar_t * );
- int in_wch ( cchar_t * );
- int in_wchnstr ( cchar_t *, int );
- int in_wchstr ( cchar_t * );
- char * key_name ( wchar_t );
- int killwchar ( wchar_t * );
- int mvaddnwstr ( int, int, const wchar_t *, int );
- int mvaddwstr ( int, int, const wchar_t * );
- int mvadd_wch ( int, int, const cchar_t * );
- int mvadd_wchnstr ( int, int, const cchar_t *, int );
- int mvadd_wchstr ( int, int, const cchar_t * );
- int mvgetn_wstr ( int, int, wint_t *, int );
- int mvget_wch ( int, int, wint_t * );
- int mvget_wstr ( int, int, wint_t * );
- int mvhline_set ( int, int, const cchar_t *, int );
- int mvinnwstr ( int, int, wchar_t *, int );
- int mvins_nwstr ( int, int, const wchar_t *, int );
- int mvins_wch ( int, int, const cchar_t * );
- int mvins_wstr ( int, int, const wchar_t * );
- int mvinwstr ( int, int, wchar_t * );
- int mvin_wch ( int, int, cchar_t * );
- int mvin_wchnstr ( int, int, cchar_t *, int );
- int mvin_wchstr ( int, int, cchar_t * );
- int mvvline_set ( int, int, const cchar_t *, int );
- int mvwaddnwstr ( WINDOW *, int, int, const wchar_t *, int );
- int mvwaddwstr ( WINDOW *, int, int, const wchar_t * );
- int mvwadd_wch ( WINDOW *, int, int, const cchar_t * );
- int mvwadd_wchnstr ( WINDOW *, int, int, const cchar_t *, int );
- int mvwadd_wchstr ( WINDOW *, int, int, const cchar_t * );
- int mvwgetn_wstr ( WINDOW *, int, int, wint_t *, int );
- int mvwget_wch ( WINDOW *, int, int, wint_t * );
- int mvwget_wstr ( WINDOW *, int, int, wint_t * );
- int mvwhline_set ( WINDOW *, int, int, const cchar_t *, int );
- int mvwinnwstr ( WINDOW *, int, int, wchar_t *, int );
- int mvwins_nwstr ( WINDOW *, int, int, const wchar_t *, int );
- int mvwins_wch ( WINDOW *, int, int, const cchar_t * );
- int mvwins_wstr ( WINDOW *, int, int, const wchar_t * );
- int mvwin_wch ( WINDOW *, int, int, cchar_t * );
- int mvwin_wchnstr ( WINDOW *, int, int, cchar_t *, int );
- int mvwin_wchstr ( WINDOW *, int, int, cchar_t * );
- int mvwinwstr ( WINDOW *, int, int, wchar_t * );
- int mvwvline_set ( WINDOW *, int, int, const cchar_t *, int );
- int pecho_wchar ( WINDOW *, const cchar_t * );
- int setcchar ( cchar_t *, const wchar_t *, const attr_t, short, const void * );
- int slk_wset ( int, const wchar_t *, int );
- int unget_wch ( const wchar_t );
- int vline_set ( const cchar_t *, int );
- int waddnwstr ( WINDOW *, const wchar_t *, int );
- int waddwstr ( WINDOW *, const wchar_t * );
- int wadd_wch ( WINDOW *, const cchar_t * );
- int wadd_wchnstr ( WINDOW *, const cchar_t *, int );
- int wadd_wchstr ( WINDOW *, const cchar_t * );
- int wbkgrnd ( WINDOW *, const cchar_t * );
- void wbkgrndset ( WINDOW *, const cchar_t * );
- int wborder_set ( WINDOW *, const cchar_t *, const cchar_t *,
- const cchar_t *, const cchar_t *, const cchar_t *,
- const cchar_t *, const cchar_t *, const cchar_t * );
- int wecho_wchar ( WINDOW *, const cchar_t * );
- int wgetbkgrnd ( WINDOW *, cchar_t * );
- int wgetn_wstr ( WINDOW *, wint_t *, int );
- int wget_wch ( WINDOW *, wint_t * );
- int wget_wstr ( WINDOW *, wint_t * );
- int whline_set ( WINDOW *, const cchar_t *, int );
- int winnwstr ( WINDOW *, wchar_t *, int );
- int wins_nwstr ( WINDOW *, const wchar_t *, int );
- int wins_wch ( WINDOW *, const cchar_t * );
- int wins_wstr ( WINDOW *, const wchar_t * );
- int winwstr ( WINDOW *, wchar_t * );
- int win_wch ( WINDOW *, cchar_t * );
- int win_wchnstr ( WINDOW *, cchar_t *, int );
- int win_wchstr ( WINDOW *, cchar_t * );
- wchar_t * wunctrl ( cchar_t * );
- int wvline_set ( WINDOW *, const cchar_t *, int );
+int addnwstr ( const wchar_t *, int );
+int addwstr ( const wchar_t * );
+int add_wch ( const cchar_t * );
+int add_wchnstr ( const cchar_t *, int );
+int add_wchstr ( const cchar_t * );
+int border_set ( const cchar_t *, const cchar_t *, const cchar_t *,
+ const cchar_t *, const cchar_t *, const cchar_t *,
+ const cchar_t *, const cchar_t * );
+int box_set ( WINDOW *, const cchar_t *, const cchar_t * );
+int echo_wchar ( const cchar_t * );
+int erasewchar ( wchar_t * );
+int getbkgrnd ( cchar_t * );
+int getcchar ( const cchar_t *, wchar_t *, attr_t *, short *, void * );
+int getn_wstr ( wint_t *, int );
+int get_wch ( wint_t * );
+int get_wstr ( wint_t * );
+int hline_set ( const cchar_t *, int );
+int innwstr ( wchar_t *, int );
+int ins_nwstr ( const wchar_t *, int );
+int ins_wch ( const cchar_t * );
+int ins_wstr ( const wchar_t * );
+int inwstr ( wchar_t * );
+int in_wch ( cchar_t * );
+int in_wchnstr ( cchar_t *, int );
+int in_wchstr ( cchar_t * );
+char *key_name ( wchar_t );
+int killwchar ( wchar_t * );
+int mvaddnwstr ( int, int, const wchar_t *, int );
+int mvaddwstr ( int, int, const wchar_t * );
+int mvadd_wch ( int, int, const cchar_t * );
+int mvadd_wchnstr ( int, int, const cchar_t *, int );
+int mvadd_wchstr ( int, int, const cchar_t * );
+int mvgetn_wstr ( int, int, wint_t *, int );
+int mvget_wch ( int, int, wint_t * );
+int mvget_wstr ( int, int, wint_t * );
+int mvhline_set ( int, int, const cchar_t *, int );
+int mvinnwstr ( int, int, wchar_t *, int );
+int mvins_nwstr ( int, int, const wchar_t *, int );
+int mvins_wch ( int, int, const cchar_t * );
+int mvins_wstr ( int, int, const wchar_t * );
+int mvinwstr ( int, int, wchar_t * );
+int mvin_wch ( int, int, cchar_t * );
+int mvin_wchnstr ( int, int, cchar_t *, int );
+int mvin_wchstr ( int, int, cchar_t * );
+int mvvline_set ( int, int, const cchar_t *, int );
+int mvwaddnwstr ( WINDOW *, int, int, const wchar_t *, int );
+int mvwaddwstr ( WINDOW *, int, int, const wchar_t * );
+int mvwadd_wch ( WINDOW *, int, int, const cchar_t * );
+int mvwadd_wchnstr ( WINDOW *, int, int, const cchar_t *, int );
+int mvwadd_wchstr ( WINDOW *, int, int, const cchar_t * );
+int mvwgetn_wstr ( WINDOW *, int, int, wint_t *, int );
+int mvwget_wch ( WINDOW *, int, int, wint_t * );
+int mvwget_wstr ( WINDOW *, int, int, wint_t * );
+int mvwhline_set ( WINDOW *, int, int, const cchar_t *, int );
+int mvwinnwstr ( WINDOW *, int, int, wchar_t *, int );
+int mvwins_nwstr ( WINDOW *, int, int, const wchar_t *, int );
+int mvwins_wch ( WINDOW *, int, int, const cchar_t * );
+int mvwins_wstr ( WINDOW *, int, int, const wchar_t * );
+int mvwin_wch ( WINDOW *, int, int, cchar_t * );
+int mvwin_wchnstr ( WINDOW *, int, int, cchar_t *, int );
+int mvwin_wchstr ( WINDOW *, int, int, cchar_t * );
+int mvwinwstr ( WINDOW *, int, int, wchar_t * );
+int mvwvline_set ( WINDOW *, int, int, const cchar_t *, int );
+int pecho_wchar ( WINDOW *, const cchar_t * );
+int setcchar ( cchar_t *, const wchar_t *, const attr_t, short, const void * );
+int slk_wset ( int, const wchar_t *, int );
+int unget_wch ( const wchar_t );
+int vline_set ( const cchar_t *, int );
+int waddnwstr ( WINDOW *, const wchar_t *, int );
+int waddwstr ( WINDOW *, const wchar_t * );
+int wadd_wch ( WINDOW *, const cchar_t * );
+int wadd_wchnstr ( WINDOW *, const cchar_t *, int );
+int wadd_wchstr ( WINDOW *, const cchar_t * );
+int wbkgrnd ( WINDOW *, const cchar_t * );
+void wbkgrndset ( WINDOW *, const cchar_t * );
+int wborder_set ( WINDOW *, const cchar_t *, const cchar_t *,
+ const cchar_t *, const cchar_t *, const cchar_t *,
+ const cchar_t *, const cchar_t *, const cchar_t * );
+int wecho_wchar ( WINDOW *, const cchar_t * );
+int wgetbkgrnd ( WINDOW *, cchar_t * );
+int wgetn_wstr ( WINDOW *, wint_t *, int );
+int wget_wch ( WINDOW *, wint_t * );
+int wget_wstr ( WINDOW *, wint_t * );
+int whline_set ( WINDOW *, const cchar_t *, int );
+int winnwstr ( WINDOW *, wchar_t *, int );
+int wins_nwstr ( WINDOW *, const wchar_t *, int );
+int wins_wch ( WINDOW *, const cchar_t * );
+int wins_wstr ( WINDOW *, const wchar_t * );
+int winwstr ( WINDOW *, wchar_t * );
+int win_wch ( WINDOW *, cchar_t * );
+int win_wchnstr ( WINDOW *, cchar_t *, int );
+int win_wchstr ( WINDOW *, cchar_t * );
+wchar_t *wunctrl ( cchar_t * );
+int wvline_set ( WINDOW *, const cchar_t *, int );
#endif
- /* Quasi-standard */
-
- chtype getattrs ( WINDOW * );
- int getbegx ( WINDOW * );
- int getbegy ( WINDOW * );
- int getmaxx ( WINDOW * );
- int getmaxy ( WINDOW * );
- int getparx ( WINDOW * );
- int getpary ( WINDOW * );
- int getcurx ( WINDOW * );
- int getcury ( WINDOW * );
- void traceoff ( void );
- void traceon ( void );
- char * unctrl ( chtype );
-
- int crmode ( void );
- int nocrmode ( void );
- int draino ( int );
- int resetterm ( void );
- int fixterm ( void );
- int saveterm ( void );
- int setsyx ( int, int );
-
- int mouse_set ( unsigned long );
- int mouse_on ( unsigned long );
- int mouse_off ( unsigned long );
- int request_mouse_pos ( void );
- int map_button ( unsigned long );
- void wmouse_position ( WINDOW *, int *, int * );
- unsigned long getmouse ( void );
- unsigned long getbmap ( void );
-
- /* ncurses */
-
- int assume_default_colors ( int, int );
- const char * curses_version ( void );
- bool has_key ( int );
- int use_default_colors ( void );
- int wresize ( WINDOW *, int, int );
-
- int mouseinterval ( int );
- mmask_t mousemask ( mmask_t, mmask_t * );
- bool mouse_trafo ( int *, int *, bool );
- int nc_getmouse ( MEVENT * );
- int ungetmouse ( MEVENT * );
- bool wenclose ( const WINDOW *, int, int );
- bool wmouse_trafo ( const WINDOW *, int *, int *, bool );
-
- /* PDCurses */
-
- int addrawch ( chtype );
- int insrawch ( chtype );
- bool is_termresized ( void );
- int mvaddrawch ( int, int, chtype );
- int mvdeleteln ( int, int );
- int mvinsertln ( int, int );
- int mvinsrawch ( int, int, chtype );
- int mvwaddrawch ( WINDOW *, int, int, chtype );
- int mvwdeleteln ( WINDOW *, int, int );
- int mvwinsertln ( WINDOW *, int, int );
- int mvwinsrawch ( WINDOW *, int, int, chtype );
- int raw_output ( bool );
- int resize_term ( int, int );
- WINDOW * resize_window ( WINDOW *, int, int );
- int waddrawch ( WINDOW *, chtype );
- int winsrawch ( WINDOW *, chtype );
- char wordchar ( void );
+/* Quasi-standard */
+
+chtype getattrs ( WINDOW * );
+int getbegx ( WINDOW * );
+int getbegy ( WINDOW * );
+int getmaxx ( WINDOW * );
+int getmaxy ( WINDOW * );
+int getparx ( WINDOW * );
+int getpary ( WINDOW * );
+int getcurx ( WINDOW * );
+int getcury ( WINDOW * );
+void traceoff ( void );
+void traceon ( void );
+char *unctrl ( chtype );
+
+int crmode ( void );
+int nocrmode ( void );
+int draino ( int );
+int resetterm ( void );
+int fixterm ( void );
+int saveterm ( void );
+int setsyx ( int, int );
+
+int mouse_set ( unsigned long );
+int mouse_on ( unsigned long );
+int mouse_off ( unsigned long );
+int request_mouse_pos ( void );
+int map_button ( unsigned long );
+void wmouse_position ( WINDOW *, int *, int * );
+unsigned long getmouse ( void );
+unsigned long getbmap ( void );
+
+/* ncurses */
+
+int assume_default_colors ( int, int );
+const char *curses_version ( void );
+bool has_key ( int );
+int use_default_colors ( void );
+int wresize ( WINDOW *, int, int );
+
+int mouseinterval ( int );
+mmask_t mousemask ( mmask_t, mmask_t * );
+bool mouse_trafo ( int *, int *, bool );
+int nc_getmouse ( MEVENT * );
+int ungetmouse ( MEVENT * );
+bool wenclose ( const WINDOW *, int, int );
+bool wmouse_trafo ( const WINDOW *, int *, int *, bool );
+
+/* PDCurses */
+
+int addrawch ( chtype );
+int insrawch ( chtype );
+bool is_termresized ( void );
+int mvaddrawch ( int, int, chtype );
+int mvdeleteln ( int, int );
+int mvinsertln ( int, int );
+int mvinsrawch ( int, int, chtype );
+int mvwaddrawch ( WINDOW *, int, int, chtype );
+int mvwdeleteln ( WINDOW *, int, int );
+int mvwinsertln ( WINDOW *, int, int );
+int mvwinsrawch ( WINDOW *, int, int, chtype );
+int raw_output ( bool );
+int resize_term ( int, int );
+WINDOW *resize_window ( WINDOW *, int, int );
+int waddrawch ( WINDOW *, chtype );
+int winsrawch ( WINDOW *, chtype );
+char wordchar ( void );
#ifdef PDC_WIDE
- wchar_t * slk_wlabel ( int );
+wchar_t *slk_wlabel ( int );
#endif
- void PDC_debug ( const char *, ... );
- int PDC_ungetch ( int );
- int PDC_set_blink ( bool );
- int PDC_set_line_color ( short );
- void PDC_set_title ( const char * );
+void PDC_debug ( const char *, ... );
+int PDC_ungetch ( int );
+int PDC_set_blink ( bool );
+int PDC_set_line_color ( short );
+void PDC_set_title ( const char * );
- int PDC_clearclipboard ( void );
- int PDC_freeclipboard ( char * );
- int PDC_getclipboard ( char **, long * );
- int PDC_setclipboard ( const char *, long );
+int PDC_clearclipboard ( void );
+int PDC_freeclipboard ( char * );
+int PDC_getclipboard ( char **, long * );
+int PDC_setclipboard ( const char *, long );
- unsigned long PDC_get_input_fd ( void );
- unsigned long PDC_get_key_modifiers ( void );
- int PDC_return_key_modifiers ( bool );
- int PDC_save_key_modifiers ( bool );
+unsigned long PDC_get_input_fd ( void );
+unsigned long PDC_get_key_modifiers ( void );
+int PDC_return_key_modifiers ( bool );
+int PDC_save_key_modifiers ( bool );
#ifdef XCURSES
- WINDOW * Xinitscr ( int, char ** );
- void XCursesExit ( void );
- int sb_init ( void );
- int sb_set_horz ( int, int, int );
- int sb_set_vert ( int, int, int );
- int sb_get_horz ( int *, int *, int * );
- int sb_get_vert ( int *, int *, int * );
- int sb_refresh ( void );
+WINDOW *Xinitscr ( int, char ** );
+void XCursesExit ( void );
+int sb_init ( void );
+int sb_set_horz ( int, int, int );
+int sb_set_vert ( int, int, int );
+int sb_get_horz ( int *, int *, int * );
+int sb_get_vert ( int *, int *, int * );
+int sb_refresh ( void );
#endif
- /*** Functions defined as macros ***/
+/*** Functions defined as macros ***/
- /* getch() and ungetch() conflict with some DOS libraries */
+/* getch() and ungetch() conflict with some DOS libraries */
#define getch() wgetch(stdscr)
#define ungetch(ch) PDC_ungetch(ch)
@@ -1341,7 +1341,7 @@ extern "C"
#define COLOR_PAIR(n) (((chtype)(n) << PDC_COLOR_SHIFT) & A_COLOR)
#define PAIR_NUMBER(n) (((n) & A_COLOR) >> PDC_COLOR_SHIFT)
- /* These will _only_ work as macros */
+/* These will _only_ work as macros */
#define getbegyx(w, y, x) (y = getbegy(w), x = getbegx(w))
#define getmaxyx(w, y, x) (y = getmaxy(w), x = getmaxx(w))
@@ -1349,20 +1349,20 @@ extern "C"
#define getyx(w, y, x) (y = getcury(w), x = getcurx(w))
#define getsyx(y, x) { if (curscr->_leaveit) (y)=(x)=-1; \
- else getyx(curscr,(y),(x)); }
+ else getyx(curscr,(y),(x)); }
#ifdef NCURSES_MOUSE_VERSION
# define getmouse(x) nc_getmouse(x)
#endif
- /* return codes from PDC_getclipboard() and PDC_setclipboard() calls */
+/* return codes from PDC_getclipboard() and PDC_setclipboard() calls */
#define PDC_CLIP_SUCCESS 0
#define PDC_CLIP_ACCESS_ERROR 1
#define PDC_CLIP_EMPTY 2
#define PDC_CLIP_MEMORY_ERROR 3
- /* PDCurses key modifier masks */
+/* PDCurses key modifier masks */
#define PDC_KEY_MODIFIER_SHIFT 1
#define PDC_KEY_MODIFIER_CONTROL 2
diff --git a/standardPregraph/iterate.c b/standardPregraph/iterate.c
index 610b253..06bda4d 100644
--- a/standardPregraph/iterate.c
+++ b/standardPregraph/iterate.c
@@ -1,7 +1,7 @@
/*
* iterate.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -26,10 +26,10 @@
#include "extfunc.h"
#include "extvab.h"
-static Kmer * kmerBuffer;
-static ubyte8 * hashBanBuffer;
+static Kmer *kmerBuffer;
+static ubyte8 *hashBanBuffer;
-static char * flagBuffer;
+static char *flagBuffer;
static int buffer_size = 100000000;//2013-5-13
long long foundcount = 0;
long long notfoundcount = 0;
@@ -37,130 +37,130 @@ long long newfoundcount = 0;
long long newnotfoundcount = 0;
long long edgeaddnumber = 0;
-unsigned int ** arcBuffer;
+unsigned int **arcBuffer;
unsigned int arcBufferCount = 0;
-unsigned int ** delarcBuffer;
+unsigned int **delarcBuffer;
unsigned int delarcBufferCount = 0;
static void forward ( unsigned int index, int first );
//Fresh preGraphBasic to record the original message and the multikmer message.
-void freshpreGraphBasic ( boolean iterate, int maxk, char * graph )
+void freshpreGraphBasic ( boolean iterate, int maxk, char *graph )
{
- char name[256], line[256];
- FILE * fp;
- int num_kmer;
- char ch;
- int min = 0, max = 0;
- int numed = 0;
- int maxreadlen = 0, minreadlen = 0, maxnamelen = 0;
- sprintf ( name, "%s.preGraphBasic", graph );
- fp = ckopen ( name, "r" );
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == 'V' )
- {
- sscanf ( line + 6, "%d %c %d", &num_kmer, &ch, &min );
- }
- else if ( line[0] == 'E' )
- {
- sscanf ( line + 5, "%d", &numed );
- }
- else if ( line[0] == 'M' )
- {
- sscanf ( line, "MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxreadlen, &minreadlen, &maxnamelen );
- }
- else if ( line[0] == 'B' )
- {
- if ( line[7] == 'V' )
- {
- sscanf ( line, "Backup VERTEX %d %c %d", &num_kmer, &ch, &min );
- }
- else if ( line[7] == 'E' )
- {
- sscanf ( line, "Backup EDGEs %d", &numed );
- }
- else if ( line[7] == 'M' )
- {
- sscanf ( line, "Backup MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxreadlen, &minreadlen, &maxnamelen );
- }
- }
- }
-
- fclose ( fp );
- sprintf ( name, "%s.preGraphBasic", graph );
- fp = ckopen ( name, "w" );
-
- if ( iterate )
- {
- fprintf ( fp, "VERTEX %d K %d\n", num_vt, maxk );
- fprintf ( fp, "\nEDGEs %d\n", num_ed );
- fprintf ( fp, "\nMaxReadLen %d MinReadLen %d MaxNameLen %d\n", maxreadlen, minreadlen, maxnamelen );
- fprintf ( fp, "\nBackup VERTEX %d K %d\n", num_kmer, min );
- fprintf ( fp, "\nBackup EDGEs %d\n", numed );
- fprintf ( fp, "\nBackup MaxReadLen %d MinReadLen %d MaxNameLen %d\n", maxreadlen, minreadlen, maxnamelen );
- }
- else
- {
- fprintf ( fp, "VERTEX %d K %d\n", num_kmer, min );
- fprintf ( fp, "\nEDGEs %d\n", numed );
- fprintf ( fp, "\nMaxReadLen %d MinReadLen %d MaxNameLen %d\n", maxreadlen, minreadlen, maxnamelen );
- }
-
- fclose ( fp );
+ char name[256], line[256];
+ FILE *fp;
+ int num_kmer;
+ char ch;
+ int min = 0, max = 0;
+ int numed = 0;
+ int maxreadlen = 0, minreadlen = 0, maxnamelen = 0;
+ sprintf ( name, "%s.preGraphBasic", graph );
+ fp = ckopen ( name, "r" );
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == 'V' )
+ {
+ sscanf ( line + 6, "%d %c %d", &num_kmer, &ch, &min );
+ }
+ else if ( line[0] == 'E' )
+ {
+ sscanf ( line + 5, "%d", &numed );
+ }
+ else if ( line[0] == 'M' )
+ {
+ sscanf ( line, "MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxreadlen, &minreadlen, &maxnamelen );
+ }
+ else if ( line[0] == 'B' )
+ {
+ if ( line[7] == 'V' )
+ {
+ sscanf ( line, "Backup VERTEX %d %c %d", &num_kmer, &ch, &min );
+ }
+ else if ( line[7] == 'E' )
+ {
+ sscanf ( line, "Backup EDGEs %d", &numed );
+ }
+ else if ( line[7] == 'M' )
+ {
+ sscanf ( line, "Backup MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxreadlen, &minreadlen, &maxnamelen );
+ }
+ }
+ }
+
+ fclose ( fp );
+ sprintf ( name, "%s.preGraphBasic", graph );
+ fp = ckopen ( name, "w" );
+
+ if ( iterate )
+ {
+ fprintf ( fp, "VERTEX %d K %d\n", num_vt, maxk );
+ fprintf ( fp, "\nEDGEs %d\n", num_ed );
+ fprintf ( fp, "\nMaxReadLen %d MinReadLen %d MaxNameLen %d\n", maxreadlen, minreadlen, maxnamelen );
+ fprintf ( fp, "\nBackup VERTEX %d K %d\n", num_kmer, min );
+ fprintf ( fp, "\nBackup EDGEs %d\n", numed );
+ fprintf ( fp, "\nBackup MaxReadLen %d MinReadLen %d MaxNameLen %d\n", maxreadlen, minreadlen, maxnamelen );
+ }
+ else
+ {
+ fprintf ( fp, "VERTEX %d K %d\n", num_kmer, min );
+ fprintf ( fp, "\nEDGEs %d\n", numed );
+ fprintf ( fp, "\nMaxReadLen %d MinReadLen %d MaxNameLen %d\n", maxreadlen, minreadlen, maxnamelen );
+ }
+
+ fclose ( fp );
}
#ifdef MER127
// kmer1 = kmer1 | kmer2
inline Kmer KmerOr ( Kmer kmer1, Kmer kmer2 )
{
- kmer1.high1 |= kmer2.high1;
- kmer1.low1 |= kmer2.low1;
- kmer1.high2 |= kmer2.high2;
- kmer1.low2 |= kmer2.low2;
- return kmer1;
+ kmer1.high1 |= kmer2.high1;
+ kmer1.low1 |= kmer2.low1;
+ kmer1.high2 |= kmer2.high2;
+ kmer1.low2 |= kmer2.low2;
+ return kmer1;
}
//Add ch at the head of prev.
inline Kmer KmerPlusHead ( Kmer prev, char ch, int len )
{
- Kmer word;
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
-
- if ( 2 * len < 64 )
- {
- word.low2 = ch & 0x3;
- word.low2 <<= ( 2 * len );
- }
- else if ( 2 * len < 128 )
- {
- word.high2 = ch & 0x3;
- word.high2 <<= ( 2 * len - 64 );
- }
- else if ( 2 * len < 192 )
- {
- word.low1 = ch & 0x3;
- word.low1 <<= ( 2 * len - 128 );
- }
- else
- {
- word.high1 = ch & 0x3;
- word.high1 <<= ( 2 * len - 192 );
- }
-
- word = KmerOr ( word, prev );
- return word;
+ Kmer word;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
+
+ if ( 2 * len < 64 )
+ {
+ word.low2 = ch & 0x3;
+ word.low2 <<= ( 2 * len );
+ }
+ else if ( 2 * len < 128 )
+ {
+ word.high2 = ch & 0x3;
+ word.high2 <<= ( 2 * len - 64 );
+ }
+ else if ( 2 * len < 192 )
+ {
+ word.low1 = ch & 0x3;
+ word.low1 <<= ( 2 * len - 128 );
+ }
+ else
+ {
+ word.high1 = ch & 0x3;
+ word.high1 <<= ( 2 * len - 192 );
+ }
+
+ word = KmerOr ( word, prev );
+ return word;
}
//Add ch at the tail of prev.
inline Kmer KmerPlusTail ( Kmer prev, char ch )
{
- Kmer word = KmerLeftBitMoveBy2 ( prev );
- word.low2 += ch & 0x3;
- return word;
+ Kmer word = KmerLeftBitMoveBy2 ( prev );
+ word.low2 += ch & 0x3;
+ return word;
}
static const Kmer kmerZero = { 0, 0, 0, 0 };
@@ -168,39 +168,39 @@ static const Kmer kmerZero = { 0, 0, 0, 0 };
//kmer1 = kmer1 | kmer2
Kmer KmerOr ( Kmer kmer1, Kmer kmer2 )
{
- kmer1.high |= kmer2.high;
- kmer1.low |= kmer2.low;
- return kmer1;
+ kmer1.high |= kmer2.high;
+ kmer1.low |= kmer2.low;
+ return kmer1;
}
//Add ch at the head of prev.
Kmer KmerPlusHead ( Kmer prev, char ch, int len )
{
- Kmer word;
- word.high = word.low = 0;
-
- if ( 2 * len < 64 )
- {
- word.low = ch & 0x3;
- word.low <<= ( 2 * len );
- }
- else
- {
- word.high = ch & 0x3;
- word.high <<= ( 2 * len - 64 );
- }
-
- word = KmerOr ( word, prev );
- return word;
+ Kmer word;
+ word.high = word.low = 0;
+
+ if ( 2 * len < 64 )
+ {
+ word.low = ch & 0x3;
+ word.low <<= ( 2 * len );
+ }
+ else
+ {
+ word.high = ch & 0x3;
+ word.high <<= ( 2 * len - 64 );
+ }
+
+ word = KmerOr ( word, prev );
+ return word;
}
//Add ch at the tail of prev.
inline Kmer KmerPlusTail ( Kmer prev, char ch )
{
- Kmer word = KmerLeftBitMoveBy2 ( prev );
- word.low += ch & 0x3;
- return word;
+ Kmer word = KmerLeftBitMoveBy2 ( prev );
+ word.low += ch & 0x3;
+ return word;
}
static const Kmer kmerZero = { 0, 0 };
@@ -220,18 +220,18 @@ Return:
*************************************************/
Kmer getFromKmer ( unsigned int index )
{
- Kmer temp = kmerZero;
- temp = vt_array[edge_array[index].from_vt].kmer;
- int i = 0;
- char c;
-
- for ( i = 0; i < step; ++i )
- {
- c = getCharInTightString ( edge_array[index].seq, i );
- temp = KmerPlusTail ( temp, c );
- }
-
- return temp;
+ Kmer temp = kmerZero;
+ temp = vt_array[edge_array[index].from_vt].kmer;
+ int i = 0;
+ char c;
+
+ for ( i = 0; i < step; ++i )
+ {
+ c = getCharInTightString ( edge_array[index].seq, i );
+ temp = KmerPlusTail ( temp, c );
+ }
+
+ return temp;
}
/*************************************************
@@ -248,52 +248,52 @@ Return:
*************************************************/
Kmer getToKmer ( unsigned int index )
{
- Kmer temp = kmerZero;
- Kmer temp2 = kmerZero;
- int len = edge_array[index].length - overlaplen + step;
- char c;
- temp = vt_array[edge_array[index].to_vt].kmer;
- temp2 = vt_array[edge_array[index].from_vt].kmer;
- int i = 0;
-
- if ( len >= step )
- {
- for ( i = 0; i < step; ++i )
- {
- c = getCharInTightString ( edge_array[index].seq, len - i - 1 );
- temp = KmerPlusHead ( temp, c, i + overlaplen - step );
- }
- }
- else if ( len > 0 && len < step )
- {
- for ( i = 0; i < len; ++i )
- {
- c = getCharInTightString ( edge_array[index].seq, len - i - 1 );
- temp = KmerPlusHead ( temp, c, i + overlaplen - step );
- }
-
- for ( i = 0; i < ( step - len ); ++i )
- {
- c = lastCharInKmer ( KmerRightBitMove ( temp2, i << 1 ) ); //.low2 & 0x3;
- temp = KmerPlusHead ( temp, c, i + overlaplen - step + len );
- }
- }
- else
- {
- for ( i = 0; i < step; ++i )
- {
- c = lastCharInKmer ( KmerRightBitMove ( temp2, ( i - len ) << 1 ) ); //.low2 & 0x3;
- temp = KmerPlusHead ( temp, c, i + overlaplen - step );
- }
- }
-
- return temp;
+ Kmer temp = kmerZero;
+ Kmer temp2 = kmerZero;
+ int len = edge_array[index].length - overlaplen + step;
+ char c;
+ temp = vt_array[edge_array[index].to_vt].kmer;
+ temp2 = vt_array[edge_array[index].from_vt].kmer;
+ int i = 0;
+
+ if ( len >= step )
+ {
+ for ( i = 0; i < step; ++i )
+ {
+ c = getCharInTightString ( edge_array[index].seq, len - i - 1 );
+ temp = KmerPlusHead ( temp, c, i + overlaplen - step );
+ }
+ }
+ else if ( len > 0 && len < step )
+ {
+ for ( i = 0; i < len; ++i )
+ {
+ c = getCharInTightString ( edge_array[index].seq, len - i - 1 );
+ temp = KmerPlusHead ( temp, c, i + overlaplen - step );
+ }
+
+ for ( i = 0; i < ( step - len ); ++i )
+ {
+ c = lastCharInKmer ( KmerRightBitMove ( temp2, i << 1 ) ); //.low2 & 0x3;
+ temp = KmerPlusHead ( temp, c, i + overlaplen - step + len );
+ }
+ }
+ else
+ {
+ for ( i = 0; i < step; ++i )
+ {
+ c = lastCharInKmer ( KmerRightBitMove ( temp2, ( i - len ) << 1 ) ); //.low2 & 0x3;
+ temp = KmerPlusHead ( temp, c, i + overlaplen - step );
+ }
+ }
+
+ return temp;
}
//Only mark on edge.
inline void delete1Edge ( unsigned int index )
{
- edge_array[index].deleted = 1;
+ edge_array[index].deleted = 1;
}
/*************************************************
@@ -311,72 +311,84 @@ Return:
*************************************************/
int kmer2vtnew ( unsigned int index, int from )
{
- Kmer kmer;
- Kmer bal_word;
- int vt_id;
- int found = 0;
- kmer_t2 * node = NULL;
-
- if ( from )
- { kmer = getFromKmer ( index ); }
- else
- { kmer = getToKmer ( index ); }
-
- bal_word = reverseComplement ( kmer, overlaplen );
-
- if ( KmerSmaller ( kmer, bal_word ) )
- {
- vt_id = bisearch ( &vt_arraynew[0], num_vtnew, kmer );
-
- if ( vt_id < 0 )
- {
- ++notfoundcount;
- fprintf ( stderr, "Updating edge, small vertex " );
- printKmerSeq ( stderr, kmer );
- fprintf ( stderr, " is not found, it's twin is " );
- printKmerSeq ( stderr, reverseComplement ( kmer, overlaplen ) );
- fprintf ( stderr, " .\n" );
- found = search_kmerset2 ( KmerSetsNew, kmer, &node );
-
- if ( found )
- { fprintf ( stderr, "The kmer is in kmer set but not in vt_array.\n" ); }
- else
- { fprintf ( stderr, "The kmer is not in kmer set and vt_array.\n" ); }
- }
- else
- {
- ++foundcount;
- }
-
- return vt_id;
- }
- else
- {
- vt_id = bisearch ( &vt_arraynew[0], num_vtnew, bal_word );
-
- if ( vt_id >= 0 )
- {
- vt_id += num_vtnew;
- ++foundcount;
- }
- else
- {
- ++notfoundcount;
- fprintf ( stderr, "Updating edge, big vertex " );
- printKmerSeq ( stderr, reverseComplement ( bal_word, overlaplen ) );
- fprintf ( stderr, " is not found, it's twin is " );
- printKmerSeq ( stderr, bal_word );
- fprintf ( stderr, " .\n" );
- found = search_kmerset2 ( KmerSetsNew, bal_word, &node );
-
- if ( found )
- { fprintf ( stderr, "The kmer is in kmer set but not in vt_array.\n" ); }
- else
- { fprintf ( stderr, "The kmer is not in kmer set and vt_array.\n" ); }
- }
-
- return vt_id;
- }
+ Kmer kmer;
+ Kmer bal_word;
+ int vt_id;
+ int found = 0;
+ kmer_t2 *node = NULL;
+
+ if ( from )
+ {
+ kmer = getFromKmer ( index );
+ }
+ else
+ {
+ kmer = getToKmer ( index );
+ }
+
+ bal_word = reverseComplement ( kmer, overlaplen );
+
+ if ( KmerSmaller ( kmer, bal_word ) )
+ {
+ vt_id = bisearch ( &vt_arraynew[0], num_vtnew, kmer );
+
+ if ( vt_id < 0 )
+ {
+ ++notfoundcount;
+ fprintf ( stderr, "Updating edge, small vertex " );
+ printKmerSeq ( stderr, kmer );
+ fprintf ( stderr, " is not found, it's twin is " );
+ printKmerSeq ( stderr, reverseComplement ( kmer, overlaplen ) );
+ fprintf ( stderr, " .\n" );
+ found = search_kmerset2 ( KmerSetsNew, kmer, &node );
+
+ if ( found )
+ {
+ fprintf ( stderr, "The kmer is in kmer set but not in vt_array.\n" );
+ }
+ else
+ {
+ fprintf ( stderr, "The kmer is not in kmer set and vt_array.\n" );
+ }
+ }
+ else
+ {
+ ++foundcount;
+ }
+
+ return vt_id;
+ }
+ else
+ {
+ vt_id = bisearch ( &vt_arraynew[0], num_vtnew, bal_word );
+
+ if ( vt_id >= 0 )
+ {
+ vt_id += num_vtnew;
+ ++foundcount;
+ }
+ else
+ {
+ ++notfoundcount;
+ fprintf ( stderr, "Updating edge, big vertex " );
+ printKmerSeq ( stderr, reverseComplement ( bal_word, overlaplen ) );
+ fprintf ( stderr, " is not found, it's twin is " );
+ printKmerSeq ( stderr, bal_word );
+ fprintf ( stderr, " .\n" );
+ found = search_kmerset2 ( KmerSetsNew, bal_word, &node );
+
+ if ( found )
+ {
+ fprintf ( stderr, "The kmer is in kmer set but not in vt_array.\n" );
+ }
+ else
+ {
+ fprintf ( stderr, "The kmer is not in kmer set and vt_array.\n" );
+ }
+ }
+
+ return vt_id;
+ }
}
/*************************************************
@@ -393,54 +405,58 @@ Return:
*************************************************/
void update1Edge ( unsigned int index )
{
- int templength = edge_array[index].length;
- int i = 0;
- int temp_from_vt;
- int temp_to_vt;
- char * tightSeq = NULL;
- temp_from_vt = kmer2vtnew ( index, 1 );
- temp_to_vt = kmer2vtnew ( index, 0 );
-
- if ( temp_from_vt < 0 || temp_to_vt < 0 )
- {
- destroyEdge2 ( index );
- delete1Edge ( index );
- fprintf ( stderr, "Warning : Kmer is not found, from_vt %d, to_vt %d.\n", temp_from_vt, temp_to_vt );
- return;
- }
-
- edge_array[index].from_vt = temp_from_vt;
- edge_array[index].to_vt = temp_to_vt;
- edge_array[index].length -= step;
-
- if ( edge_array[index].length == 1 || edge_array[index].length == 0 )
- { edge_array[index].cvg = 0; }
-
- tightSeq = ( char * ) ckalloc ( ( edge_array[index].length / 4 + 1 ) * sizeof ( char ) );
-
- for ( i = 0; i < edge_array[index].length; ++i )
- { writeChar2tightString ( getCharInTightString ( edge_array[index].seq, i + step ), tightSeq, i ); }
-
- if ( edge_array[index].seq )
- {
- free ( ( void * ) edge_array[index].seq );
- edge_array[index].seq = NULL;
- }
-
- edge_array[index].seq = tightSeq;
- edge_array[index].rv = NULL;
- ARC * currArc = edge_array[index].arcs;
- ARC * tempArc = NULL;
-
- while ( currArc )
- {
- tempArc = currArc;
- currArc = currArc->next;
- edge_array[index].arcs = deleteArc ( edge_array[index].arcs, tempArc );
- }
-
- edge_array[index].arcs = NULL;
- edge_array[index].markers = NULL;
+ int templength = edge_array[index].length;
+ int i = 0;
+ int temp_from_vt;
+ int temp_to_vt;
+ char *tightSeq = NULL;
+ temp_from_vt = kmer2vtnew ( index, 1 );
+ temp_to_vt = kmer2vtnew ( index, 0 );
+
+ if ( temp_from_vt < 0 || temp_to_vt < 0 )
+ {
+ destroyEdge2 ( index );
+ delete1Edge ( index );
+ fprintf ( stderr, "Warning : Kmer is not found, from_vt %d, to_vt %d.\n", temp_from_vt, temp_to_vt );
+ return;
+ }
+
+ edge_array[index].from_vt = temp_from_vt;
+ edge_array[index].to_vt = temp_to_vt;
+ edge_array[index].length -= step;
+
+ if ( edge_array[index].length == 1 || edge_array[index].length == 0 )
+ {
+ edge_array[index].cvg = 0;
+ }
+
+ tightSeq = ( char * ) ckalloc ( ( edge_array[index].length / 4 + 1 ) * sizeof ( char ) );
+
+ for ( i = 0; i < edge_array[index].length; ++i )
+ {
+ writeChar2tightString ( getCharInTightString ( edge_array[index].seq, i + step ), tightSeq, i );
+ }
+
+ if ( edge_array[index].seq )
+ {
+ free ( ( void * ) edge_array[index].seq );
+ edge_array[index].seq = NULL;
+ }
+
+ edge_array[index].seq = tightSeq;
+ edge_array[index].rv = NULL;
+ ARC *currArc = edge_array[index].arcs;
+ ARC *tempArc = NULL;
+
+ while ( currArc )
+ {
+ tempArc = currArc;
+ currArc = currArc->next;
+ edge_array[index].arcs = deleteArc ( edge_array[index].arcs, tempArc );
+ }
+
+ edge_array[index].arcs = NULL;
+ edge_array[index].markers = NULL;
}
/*************************************************
@@ -457,168 +473,170 @@ Return:
*************************************************/
void getNewHash()
{
- unsigned int i;
- ubyte8 hash_ban, bal_hash_ban;
- Kmer word, bal_word;
- kmer_t2 * node;
- unsigned int deletecount = 0;
- kmer_cnew = 0;
- kmerBuffer = ( Kmer * ) ckalloc ( 2 * ( num_ed + 1 ) * sizeof ( Kmer ) );
- hashBanBuffer = ( ubyte8 * ) ckalloc ( 2 * ( num_ed + 1 ) * sizeof ( ubyte8 ) );
- edge_id = ( unsigned int * ) ckalloc ( 2 * ( num_ed + 1 ) * sizeof ( unsigned int ) );
- //00 : big to 01 : big from,10 : small to, 11 : small from
- flagBuffer = ( boolean * ) ckalloc ( 2 * ( num_ed + 1 ) * sizeof ( boolean ) );
-
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted )
- { continue; }
-
- if ( edge_array[i].length < step ) //=
- {
- destroyEdge2 ( i );
- delete1Edge ( i );
- deletecount++;
- continue;
- }
-
- word = kmerZero;
- bal_word = kmerZero;
-
- if ( edge_array[i].length == step )
- {
- word = getFromKmer ( i );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- hashBanBuffer[kmer_cnew] = hash_ban;
- kmerBuffer[kmer_cnew] = word;
- flagBuffer[kmer_cnew] = 4;
- }
- else
- {
- bal_hash_ban = hash_kmer ( bal_word );
- hashBanBuffer[kmer_cnew] = bal_hash_ban;
- kmerBuffer[kmer_cnew] = bal_word;
- flagBuffer[kmer_cnew] = 5;
- }
-
- edge_id[kmer_cnew] = i;
- ++kmer_cnew;
- continue;
- }
-
- if ( edge_array[i].bal_edge == 1 )
- {
- word = getFromKmer ( i );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- hashBanBuffer[kmer_cnew] = hash_ban;
- kmerBuffer[kmer_cnew] = word;
- flagBuffer[kmer_cnew] = 6;
- }
- else
- {
- bal_hash_ban = hash_kmer ( bal_word );
- hashBanBuffer[kmer_cnew] = bal_hash_ban;
- kmerBuffer[kmer_cnew] = bal_word;
- flagBuffer[kmer_cnew] = 7;
- }
-
- edge_id[kmer_cnew] = i;
- ++kmer_cnew;
- word = kmerZero;
- bal_word = kmerZero;
- word = getToKmer ( i );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- hashBanBuffer[kmer_cnew] = hash_ban;
- kmerBuffer[kmer_cnew] = word;
- flagBuffer[kmer_cnew] = 8;
- }
- else
- {
- bal_hash_ban = hash_kmer ( bal_word );
- hashBanBuffer[kmer_cnew] = bal_hash_ban;
- kmerBuffer[kmer_cnew] = bal_word;
- flagBuffer[kmer_cnew] = 9;
- }
-
- edge_id[kmer_cnew] = i;
- ++kmer_cnew;
- continue;
- }
-
- word = getFromKmer ( i );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- hashBanBuffer[kmer_cnew] = hash_ban;
- kmerBuffer[kmer_cnew] = word;
- flagBuffer[kmer_cnew] = 3;
- }
- else
- {
- bal_hash_ban = hash_kmer ( bal_word );
- hashBanBuffer[kmer_cnew] = bal_hash_ban;
- kmerBuffer[kmer_cnew] = bal_word;
- flagBuffer[kmer_cnew] = 1;
- }
-
- edge_id[kmer_cnew] = i;
- ++kmer_cnew;
- word = kmerZero;
- bal_word = kmerZero;
- word = getToKmer ( i );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- hashBanBuffer[kmer_cnew] = hash_ban;
- kmerBuffer[kmer_cnew] = word;
- flagBuffer[kmer_cnew] = 2;
- }
- else
- {
- bal_hash_ban = hash_kmer ( bal_word );
- hashBanBuffer[kmer_cnew] = bal_hash_ban;
- kmerBuffer[kmer_cnew] = bal_word;
- flagBuffer[kmer_cnew] = 0;
- }
-
- edge_id[kmer_cnew] = i;
- ++kmer_cnew;
- }
-
- fprintf ( stderr, "%lld edge(s) deleted in length of 0.\n", deletecount );
- KmerSetsNew = init_kmerset2 ( 1024, 0.77f );
-
- for ( i = 0; i < kmer_cnew; ++i )
- {
- put_kmerset2 ( KmerSetsNew, kmerBuffer[i], edge_id[i], flagBuffer[i], &node );
- }
-
- num_vtnew = count_kmerset2 ( KmerSetsNew );
- fprintf ( stderr, "%u new kmer(s).\n", num_vtnew );
- free ( kmerBuffer );
- kmerBuffer = NULL;
- free ( hashBanBuffer );
- hashBanBuffer = NULL;
- free ( edge_id );
- edge_id = NULL;
- free ( flagBuffer );
- flagBuffer = NULL;
+ unsigned int i;
+ ubyte8 hash_ban, bal_hash_ban;
+ Kmer word, bal_word;
+ kmer_t2 *node;
+ unsigned int deletecount = 0;
+ kmer_cnew = 0;
+ kmerBuffer = ( Kmer * ) ckalloc ( 2 * ( num_ed + 1 ) * sizeof ( Kmer ) );
+ hashBanBuffer = ( ubyte8 * ) ckalloc ( 2 * ( num_ed + 1 ) * sizeof ( ubyte8 ) );
+ edge_id = ( unsigned int * ) ckalloc ( 2 * ( num_ed + 1 ) * sizeof ( unsigned int ) );
+ //00 : big to 01 : big from,10 : small to, 11 : small from
+ flagBuffer = ( boolean * ) ckalloc ( 2 * ( num_ed + 1 ) * sizeof ( boolean ) );
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted )
+ {
+ continue;
+ }
+
+ if ( edge_array[i].length < step ) //=
+ {
+ destroyEdge2 ( i );
+ delete1Edge ( i );
+ deletecount++;
+ continue;
+ }
+
+ word = kmerZero;
+ bal_word = kmerZero;
+
+ if ( edge_array[i].length == step )
+ {
+ word = getFromKmer ( i );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ hashBanBuffer[kmer_cnew] = hash_ban;
+ kmerBuffer[kmer_cnew] = word;
+ flagBuffer[kmer_cnew] = 4;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer ( bal_word );
+ hashBanBuffer[kmer_cnew] = bal_hash_ban;
+ kmerBuffer[kmer_cnew] = bal_word;
+ flagBuffer[kmer_cnew] = 5;
+ }
+
+ edge_id[kmer_cnew] = i;
+ ++kmer_cnew;
+ continue;
+ }
+
+ if ( edge_array[i].bal_edge == 1 )
+ {
+ word = getFromKmer ( i );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ hashBanBuffer[kmer_cnew] = hash_ban;
+ kmerBuffer[kmer_cnew] = word;
+ flagBuffer[kmer_cnew] = 6;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer ( bal_word );
+ hashBanBuffer[kmer_cnew] = bal_hash_ban;
+ kmerBuffer[kmer_cnew] = bal_word;
+ flagBuffer[kmer_cnew] = 7;
+ }
+
+ edge_id[kmer_cnew] = i;
+ ++kmer_cnew;
+ word = kmerZero;
+ bal_word = kmerZero;
+ word = getToKmer ( i );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ hashBanBuffer[kmer_cnew] = hash_ban;
+ kmerBuffer[kmer_cnew] = word;
+ flagBuffer[kmer_cnew] = 8;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer ( bal_word );
+ hashBanBuffer[kmer_cnew] = bal_hash_ban;
+ kmerBuffer[kmer_cnew] = bal_word;
+ flagBuffer[kmer_cnew] = 9;
+ }
+
+ edge_id[kmer_cnew] = i;
+ ++kmer_cnew;
+ continue;
+ }
+
+ word = getFromKmer ( i );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ hashBanBuffer[kmer_cnew] = hash_ban;
+ kmerBuffer[kmer_cnew] = word;
+ flagBuffer[kmer_cnew] = 3;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer ( bal_word );
+ hashBanBuffer[kmer_cnew] = bal_hash_ban;
+ kmerBuffer[kmer_cnew] = bal_word;
+ flagBuffer[kmer_cnew] = 1;
+ }
+
+ edge_id[kmer_cnew] = i;
+ ++kmer_cnew;
+ word = kmerZero;
+ bal_word = kmerZero;
+ word = getToKmer ( i );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ hashBanBuffer[kmer_cnew] = hash_ban;
+ kmerBuffer[kmer_cnew] = word;
+ flagBuffer[kmer_cnew] = 2;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer ( bal_word );
+ hashBanBuffer[kmer_cnew] = bal_hash_ban;
+ kmerBuffer[kmer_cnew] = bal_word;
+ flagBuffer[kmer_cnew] = 0;
+ }
+
+ edge_id[kmer_cnew] = i;
+ ++kmer_cnew;
+ }
+
+ fprintf ( stderr, "%lld edge(s) deleted in length of 0.\n", deletecount );
+ KmerSetsNew = init_kmerset2 ( 1024, 0.77f );
+
+ for ( i = 0; i < kmer_cnew; ++i )
+ {
+ put_kmerset2 ( KmerSetsNew, kmerBuffer[i], edge_id[i], flagBuffer[i], &node );
+ }
+
+ num_vtnew = count_kmerset2 ( KmerSetsNew );
+ fprintf ( stderr, "%u new kmer(s).\n", num_vtnew );
+ free ( kmerBuffer );
+ kmerBuffer = NULL;
+ free ( hashBanBuffer );
+ hashBanBuffer = NULL;
+ free ( edge_id );
+ edge_id = NULL;
+ free ( flagBuffer );
+ flagBuffer = NULL;
}
/*************************************************
@@ -635,47 +653,51 @@ Return:
*************************************************/
void getNewVertex()
{
- unsigned int i;
- Kmer word, bal_word;
- vt_arraynew = ( VERTEX * ) ckalloc ( 4 * ( num_vtnew + 1 ) * sizeof ( VERTEX ) );
- num_kmer_limit = 4 * num_vtnew;
- KmerSet2 * set;
- kmer_t2 * node;
- unsigned int count = 0;
- set = KmerSetsNew;
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- node = set->array + set->iter_ptr;
-
- if ( node )
- {
- word = node->seq;
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- { vt_arraynew[count].kmer = word; }
- else
- { vt_arraynew[count].kmer = bal_word; }
-
- ++count;
- }
- }
-
- set->iter_ptr ++;
- }
-
- num_vtnew = count;
- qsort ( &vt_arraynew[0], num_vtnew, sizeof ( vt_arraynew[0] ), cmp_vertex );
-
- for ( i = 0; i < num_vtnew; ++i )
- {
- bal_word = reverseComplement ( vt_arraynew[i].kmer, overlaplen );
- vt_arraynew[i + num_vtnew].kmer = bal_word;
- }
+ unsigned int i;
+ Kmer word, bal_word;
+ vt_arraynew = ( VERTEX * ) ckalloc ( 4 * ( num_vtnew + 1 ) * sizeof ( VERTEX ) );
+ num_kmer_limit = 4 * num_vtnew;
+ KmerSet2 *set;
+ kmer_t2 *node;
+ unsigned int count = 0;
+ set = KmerSetsNew;
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ node = set->array + set->iter_ptr;
+
+ if ( node )
+ {
+ word = node->seq;
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ vt_arraynew[count].kmer = word;
+ }
+ else
+ {
+ vt_arraynew[count].kmer = bal_word;
+ }
+
+ ++count;
+ }
+ }
+
+ set->iter_ptr ++;
+ }
+
+ num_vtnew = count;
+ qsort ( &vt_arraynew[0], num_vtnew, sizeof ( vt_arraynew[0] ), cmp_vertex );
+
+ for ( i = 0; i < num_vtnew; ++i )
+ {
+ bal_word = reverseComplement ( vt_arraynew[i].kmer, overlaplen );
+ vt_arraynew[i + num_vtnew].kmer = bal_word;
+ }
}
/*************************************************
@@ -695,48 +717,50 @@ Return:
*************************************************/
void buildGraphHash()
{
- unsigned int i;
- unsigned int count = 0;
- //use from kmer & to kmer to build hash
- fprintf ( stderr, "Construct new kmer hash.\n" );
- getNewHash();
- getNewVertex();
- foundcount = 0;
- notfoundcount = 0;
-
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted )
- { continue; }
-
- if ( edge_array[i].length < step ) //=
- {
- destroyEdge2 ( i );
- delete1Edge ( i );
- continue;
- }
-
- //update twin edge together
- update1Edge ( i );
- ++count;
- }
-
- if ( notfoundcount )
- {
- fprintf ( stderr, "There are %lld kmer(s) found.\n", foundcount );
- fprintf ( stderr, "There are %lld kmer(s) not found.\n", notfoundcount );
- }
-
- fprintf ( stderr, "%u edge(s) updated to %dmer edge.\n", count, overlaplen );
-
- if ( vt_array )
- {
- free ( ( void * ) vt_array );
- vt_array = NULL;
- }
-
- vt_array = vt_arraynew;
- num_vt = num_vtnew;
+ unsigned int i;
+ unsigned int count = 0;
+ //use from kmer & to kmer to build hash
+ fprintf ( stderr, "Construct new kmer hash.\n" );
+ getNewHash();
+ getNewVertex();
+ foundcount = 0;
+ notfoundcount = 0;
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted )
+ {
+ continue;
+ }
+
+ if ( edge_array[i].length < step ) //=
+ {
+ destroyEdge2 ( i );
+ delete1Edge ( i );
+ continue;
+ }
+
+ //update twin edge together
+ update1Edge ( i );
+ ++count;
+ }
+
+ if ( notfoundcount )
+ {
+ fprintf ( stderr, "There are %lld kmer(s) found.\n", foundcount );
+ fprintf ( stderr, "There are %lld kmer(s) not found.\n", notfoundcount );
+ }
+
+ fprintf ( stderr, "%u edge(s) updated to %dmer edge.\n", count, overlaplen );
+
+ if ( vt_array )
+ {
+ free ( ( void * ) vt_array );
+ vt_array = NULL;
+ }
+
+ vt_array = vt_arraynew;
+ num_vt = num_vtnew;
}
//FILE *tempF = NULL;
@@ -759,40 +783,42 @@ Output:
Return:
None.
*************************************************/
-void addArc ( char * libfile, char * graph, int flag, int last, int maxk ) //, boolean keepReadFile
+void addArc ( char *libfile, char *graph, int flag, int last, int maxk ) //, boolean keepReadFile
{
- /*
- if(!flag)
- {
- char readSeqName[256];
- sprintf(readSeqName,"%s.read",graph);
- tempF=fopen(readSeqName,"r");
- }
- else
- {
- tempF = NULL;
- }
- */
- if ( flag ) // || tempF)
- {
- // if(tempF)
- // fclose(tempF);
- Read2edge2 ( libfile, graph, last, maxk ); //, keepReadFile
- }
- else
- { Read2edge ( libfile, graph, maxk ); }
-
- unsigned int i;
-
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted )
- {
- destroyEdge2 ( i );
- }
- }
-
- removeDeadArcs2();
+ /*
+ if(!flag)
+ {
+ char readSeqName[256];
+ sprintf(readSeqName,"%s.read",graph);
+ tempF=fopen(readSeqName,"r");
+ }
+ else
+ {
+ tempF = NULL;
+ }
+ */
+ if ( flag ) // || tempF)
+ {
+ // if(tempF)
+ // fclose(tempF);
+ Read2edge2 ( libfile, graph, last, maxk ); //, keepReadFile
+ }
+ else
+ {
+ Read2edge ( libfile, graph, maxk );
+ }
+
+ unsigned int i;
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted )
+ {
+ destroyEdge2 ( i );
+ }
+ }
+
+ removeDeadArcs2();
}
/*************************************************
@@ -810,329 +836,349 @@ Output:
Return:
Index of new extended kmer.
*************************************************/
-int kmer2edge ( int from, unsigned int index, char ch, char * backup )
+int kmer2edge ( int from, unsigned int index, char ch, char *backup )
{
- //add 1 kmer to edge
- Kmer kmer = kmerZero, bal_word = kmerZero;
- int vt_id = 0;
- char * tightSeq = NULL;
-
- // if(edge_array[index].arcs && edge_array[edge_array[index].arcs->to_ed].length <= 1)
- // return;
-
- if ( from <= 2 )
- {
- kmer = vt_array[edge_array[index].from_vt].kmer;
- *backup = lastCharInKmer ( kmer ); //.low2 & 3;
- kmer = prevKmer ( kmer, ch );
- }
- else
- {
- kmer = vt_array[edge_array[index].to_vt].kmer;
- *backup = ch;
- kmer = nextKmer ( kmer, ch );
- }
-
- bal_word = reverseComplement ( kmer, overlaplen );
-
- if ( KmerSmaller ( kmer, bal_word ) )
- {
- vt_id = bisearch ( &vt_array[0], num_vt, kmer );
-
- if ( vt_id < 0 )
- {
- ++newnotfoundcount;
- fprintf ( stderr, "When extending edge 'small vertex' is not found, edge %d kmer ", index );
- printKmerSeq ( stderr, kmer );
- fprintf ( stderr, " , it's twin " );
- printKmerSeq ( stderr, bal_word );
- fprintf ( stderr, " .\n" );
- }
- else
- {
- ++newfoundcount;
- }
- }
- else
- {
- vt_id = bisearch ( &vt_array[0], num_vt, bal_word );
-
- if ( vt_id >= 0 )
- {
- vt_id += num_vt;
- ++newfoundcount;
- }
- else
- {
- ++newnotfoundcount;
- fprintf ( stderr, "When extending edge 'big vertex' is not found, edge %d kmer ", index );
- printKmerSeq ( stderr, kmer );
- fprintf ( stderr, " , it's twin " );
- printKmerSeq ( stderr, bal_word );
- fprintf ( stderr, " .\n" );
- }
- }
-
- if ( vt_id < 0 )
- {
- return vt_id;
- }
-
- char backup1 = 0;
- char ch1 = int_comp ( ch );
- Kmer kmer1 = kmerZero, bal_word1 = kmerZero;
- int vt_id1 = 0;
- char * tightSeq1 = NULL;
-
- if ( from <= 2 )
- {
- kmer1 = vt_array[edge_array[index + 1].to_vt].kmer;
- backup1 = ch1;
- kmer1 = nextKmer ( kmer1, ch1 );
- }
- else
- {
- kmer1 = vt_array[edge_array[index + 1].from_vt].kmer;
- backup1 = lastCharInKmer ( kmer1 ); //.low2 & 3;
- kmer1 = prevKmer ( kmer1, ch1 );
- }
-
- bal_word1 = reverseComplement ( kmer1, overlaplen );
-
- if ( KmerSmaller ( kmer1, bal_word1 ) )
- {
- vt_id1 = bisearch ( &vt_array[0], num_vt, kmer1 );
-
- if ( vt_id1 < 0 )
- {
- ++newnotfoundcount;
- fprintf ( stderr, "When extending edge 'small vertex' is not found, edge %d kmer ", index + 1 );
- printKmerSeq ( stderr, kmer1 );
- fprintf ( stderr, " , it's twin " );
- printKmerSeq ( stderr, bal_word1 );
- fprintf ( stderr, " .\n" );
- }
- else
- {
- ++newfoundcount;
- }
- }
- else
- {
- vt_id1 = bisearch ( &vt_array[0], num_vt, bal_word1 );
-
- if ( vt_id1 >= 0 )
- {
- vt_id1 += num_vt;
- ++newfoundcount;
- }
- else
- {
- ++newnotfoundcount;
- fprintf ( stderr, "When extending edge big vertex is not found, edge %d kmer ", index + 1 );
- printKmerSeq ( stderr, kmer1 );
- fprintf ( stderr, " , it's twin " );
- printKmerSeq ( stderr, bal_word1 );
- fprintf ( stderr, " .\n" );
- }
- }
-
- if ( vt_id1 < 0 )
- {
- return vt_id1;
- }
-
- int i = 0;
-
- if ( from <= 2 )
- {
- //small
- edge_array[index].from_vt = vt_id;
- tightSeq = ( char * ) ckalloc ( ( ( edge_array[index].length + 1 ) / 4 + 1 ) * sizeof ( char ) );
- writeChar2tightString ( *backup, tightSeq, 0 );
-
- for ( i = 0; i < edge_array[index].length; ++i )
- { writeChar2tightString ( getCharInTightString ( edge_array[index].seq, i ), tightSeq, i + 1 ); }
-
- if ( edge_array[index].seq )
- {
- free ( ( void * ) edge_array[index].seq );
- edge_array[index].seq = NULL;
- }
-
- edge_array[index].seq = tightSeq;
- edge_array[index].length += 1;
- //big
- edge_array[index + 1].to_vt = vt_id1;
- tightSeq1 = ( char * ) ckalloc ( ( ( edge_array[index + 1].length + 1 ) / 4 + 1 ) * sizeof ( char ) );
-
- for ( i = 0; i < edge_array[index + 1].length; ++i )
- { writeChar2tightString ( getCharInTightString ( edge_array[index + 1].seq, i ), tightSeq1, i ); }
-
- writeChar2tightString ( backup1, tightSeq1, i );
-
- if ( edge_array[index + 1].seq )
- {
- free ( ( void * ) edge_array[index + 1].seq );
- edge_array[index + 1].seq = NULL;
- }
-
- edge_array[index + 1].seq = tightSeq1;
- edge_array[index + 1].length += 1;
- }
- else
- {
- //small
- edge_array[index].to_vt = vt_id;
- tightSeq = ( char * ) ckalloc ( ( ( edge_array[index].length + 1 ) / 4 + 1 ) * sizeof ( char ) );
-
- for ( i = 0; i < edge_array[index].length; ++i )
- { writeChar2tightString ( getCharInTightString ( edge_array[index].seq, i ), tightSeq, i ); }
-
- writeChar2tightString ( *backup, tightSeq, i );
-
- if ( edge_array[index].seq )
- {
- free ( ( void * ) edge_array[index].seq );
- edge_array[index].seq = NULL;
- }
-
- edge_array[index].seq = tightSeq;
- edge_array[index].length += 1;
- //big
- edge_array[index + 1].from_vt = vt_id1;
- tightSeq1 = ( char * ) ckalloc ( ( ( edge_array[index + 1].length + 1 ) / 4 + 1 ) * sizeof ( char ) );
- writeChar2tightString ( backup1, tightSeq1, 0 );
-
- for ( i = 0; i < edge_array[index + 1].length; ++i )
- { writeChar2tightString ( getCharInTightString ( edge_array[index + 1].seq, i ), tightSeq1, i + 1 ); }
-
- if ( edge_array[index + 1].seq )
- {
- free ( ( void * ) edge_array[index + 1].seq );
- edge_array[index + 1].seq = NULL;
- }
-
- edge_array[index + 1].seq = tightSeq1;
- edge_array[index + 1].length += 1;
- }
-
- return 0;
+ //add 1 kmer to edge
+ Kmer kmer = kmerZero, bal_word = kmerZero;
+ int vt_id = 0;
+ char *tightSeq = NULL;
+
+ // if(edge_array[index].arcs && edge_array[edge_array[index].arcs->to_ed].length <= 1)
+ // return;
+
+ if ( from <= 2 )
+ {
+ kmer = vt_array[edge_array[index].from_vt].kmer;
+ *backup = lastCharInKmer ( kmer ); //.low2 & 3;
+ kmer = prevKmer ( kmer, ch );
+ }
+ else
+ {
+ kmer = vt_array[edge_array[index].to_vt].kmer;
+ *backup = ch;
+ kmer = nextKmer ( kmer, ch );
+ }
+
+ bal_word = reverseComplement ( kmer, overlaplen );
+
+ if ( KmerSmaller ( kmer, bal_word ) )
+ {
+ vt_id = bisearch ( &vt_array[0], num_vt, kmer );
+
+ if ( vt_id < 0 )
+ {
+ ++newnotfoundcount;
+ fprintf ( stderr, "When extending edge 'small vertex' is not found, edge %d kmer ", index );
+ printKmerSeq ( stderr, kmer );
+ fprintf ( stderr, " , it's twin " );
+ printKmerSeq ( stderr, bal_word );
+ fprintf ( stderr, " .\n" );
+ }
+ else
+ {
+ ++newfoundcount;
+ }
+ }
+ else
+ {
+ vt_id = bisearch ( &vt_array[0], num_vt, bal_word );
+
+ if ( vt_id >= 0 )
+ {
+ vt_id += num_vt;
+ ++newfoundcount;
+ }
+ else
+ {
+ ++newnotfoundcount;
+ fprintf ( stderr, "When extending edge 'big vertex' is not found, edge %d kmer ", index );
+ printKmerSeq ( stderr, kmer );
+ fprintf ( stderr, " , it's twin " );
+ printKmerSeq ( stderr, bal_word );
+ fprintf ( stderr, " .\n" );
+ }
+ }
+
+ if ( vt_id < 0 )
+ {
+ return vt_id;
+ }
+
+ char backup1 = 0;
+ char ch1 = int_comp ( ch );
+ Kmer kmer1 = kmerZero, bal_word1 = kmerZero;
+ int vt_id1 = 0;
+ char *tightSeq1 = NULL;
+
+ if ( from <= 2 )
+ {
+ kmer1 = vt_array[edge_array[index + 1].to_vt].kmer;
+ backup1 = ch1;
+ kmer1 = nextKmer ( kmer1, ch1 );
+ }
+ else
+ {
+ kmer1 = vt_array[edge_array[index + 1].from_vt].kmer;
+ backup1 = lastCharInKmer ( kmer1 ); //.low2 & 3;
+ kmer1 = prevKmer ( kmer1, ch1 );
+ }
+
+ bal_word1 = reverseComplement ( kmer1, overlaplen );
+
+ if ( KmerSmaller ( kmer1, bal_word1 ) )
+ {
+ vt_id1 = bisearch ( &vt_array[0], num_vt, kmer1 );
+
+ if ( vt_id1 < 0 )
+ {
+ ++newnotfoundcount;
+ fprintf ( stderr, "When extending edge 'small vertex' is not found, edge %d kmer ", index + 1 );
+ printKmerSeq ( stderr, kmer1 );
+ fprintf ( stderr, " , it's twin " );
+ printKmerSeq ( stderr, bal_word1 );
+ fprintf ( stderr, " .\n" );
+ }
+ else
+ {
+ ++newfoundcount;
+ }
+ }
+ else
+ {
+ vt_id1 = bisearch ( &vt_array[0], num_vt, bal_word1 );
+
+ if ( vt_id1 >= 0 )
+ {
+ vt_id1 += num_vt;
+ ++newfoundcount;
+ }
+ else
+ {
+ ++newnotfoundcount;
+ fprintf ( stderr, "When extending edge big vertex is not found, edge %d kmer ", index + 1 );
+ printKmerSeq ( stderr, kmer1 );
+ fprintf ( stderr, " , it's twin " );
+ printKmerSeq ( stderr, bal_word1 );
+ fprintf ( stderr, " .\n" );
+ }
+ }
+
+ if ( vt_id1 < 0 )
+ {
+ return vt_id1;
+ }
+
+ int i = 0;
+
+ if ( from <= 2 )
+ {
+ //small
+ edge_array[index].from_vt = vt_id;
+ tightSeq = ( char * ) ckalloc ( ( ( edge_array[index].length + 1 ) / 4 + 1 ) * sizeof ( char ) );
+ writeChar2tightString ( *backup, tightSeq, 0 );
+
+ for ( i = 0; i < edge_array[index].length; ++i )
+ {
+ writeChar2tightString ( getCharInTightString ( edge_array[index].seq, i ), tightSeq, i + 1 );
+ }
+
+ if ( edge_array[index].seq )
+ {
+ free ( ( void * ) edge_array[index].seq );
+ edge_array[index].seq = NULL;
+ }
+
+ edge_array[index].seq = tightSeq;
+ edge_array[index].length += 1;
+ //big
+ edge_array[index + 1].to_vt = vt_id1;
+ tightSeq1 = ( char * ) ckalloc ( ( ( edge_array[index + 1].length + 1 ) / 4 + 1 ) * sizeof ( char ) );
+
+ for ( i = 0; i < edge_array[index + 1].length; ++i )
+ {
+ writeChar2tightString ( getCharInTightString ( edge_array[index + 1].seq, i ), tightSeq1, i );
+ }
+
+ writeChar2tightString ( backup1, tightSeq1, i );
+
+ if ( edge_array[index + 1].seq )
+ {
+ free ( ( void * ) edge_array[index + 1].seq );
+ edge_array[index + 1].seq = NULL;
+ }
+
+ edge_array[index + 1].seq = tightSeq1;
+ edge_array[index + 1].length += 1;
+ }
+ else
+ {
+ //small
+ edge_array[index].to_vt = vt_id;
+ tightSeq = ( char * ) ckalloc ( ( ( edge_array[index].length + 1 ) / 4 + 1 ) * sizeof ( char ) );
+
+ for ( i = 0; i < edge_array[index].length; ++i )
+ {
+ writeChar2tightString ( getCharInTightString ( edge_array[index].seq, i ), tightSeq, i );
+ }
+
+ writeChar2tightString ( *backup, tightSeq, i );
+
+ if ( edge_array[index].seq )
+ {
+ free ( ( void * ) edge_array[index].seq );
+ edge_array[index].seq = NULL;
+ }
+
+ edge_array[index].seq = tightSeq;
+ edge_array[index].length += 1;
+ //big
+ edge_array[index + 1].from_vt = vt_id1;
+ tightSeq1 = ( char * ) ckalloc ( ( ( edge_array[index + 1].length + 1 ) / 4 + 1 ) * sizeof ( char ) );
+ writeChar2tightString ( backup1, tightSeq1, 0 );
+
+ for ( i = 0; i < edge_array[index + 1].length; ++i )
+ {
+ writeChar2tightString ( getCharInTightString ( edge_array[index + 1].seq, i ), tightSeq1, i + 1 );
+ }
+
+ if ( edge_array[index + 1].seq )
+ {
+ free ( ( void * ) edge_array[index + 1].seq );
+ edge_array[index + 1].seq = NULL;
+ }
+
+ edge_array[index + 1].seq = tightSeq1;
+ edge_array[index + 1].length += 1;
+ }
+
+ return 0;
}
//Add edge and reverse complement.
void addEdge ( unsigned int from, unsigned int to, char ch, int bal_edge, unsigned int cvg )
{
- if ( num_ed_temp + 1 > num_ed_limit )
- {
- unsigned int new_num_ed = num_ed_limit * 1.2;
- edge_array = ( EDGE * ) ckrealloc ( edge_array, ( new_num_ed + 1 ) * sizeof ( EDGE ), ( num_ed_limit + 1 ) * sizeof ( EDGE ) );
- num_ed_limit = new_num_ed;
- int j;
-
- for ( j = num_ed_temp + 1; j <= num_ed_limit; j++ )
- {
- edge_array[j].seq = NULL;
- }
-
- fprintf ( stderr, "Realloc edge array.\n" );
- }
-
- char * tightSeq = ( char * ) ckalloc ( sizeof ( char ) );
- writeChar2tightString ( ch, tightSeq, 0 );
- edge_array[num_ed_temp + 1].from_vt = from;
- edge_array[num_ed_temp + 1].to_vt = to;
- edge_array[num_ed_temp + 1].length = 1;
- edge_array[num_ed_temp + 1].cvg = cvg;
- edge_array[num_ed_temp + 1].bal_edge = bal_edge;
- edge_array[num_ed_temp + 1].multi = 0;
- edge_array[num_ed_temp + 1].deleted = 0;
- edge_array[num_ed_temp + 1].flag = 0;
-
- if ( edge_array[num_ed_temp + 1].seq )
- { free ( edge_array[num_ed_temp + 1].seq ); }
-
- edge_array[num_ed_temp + 1].seq = tightSeq;
- edge_array[num_ed_temp + 1].rv = NULL;
- edge_array[num_ed_temp + 1].arcs = NULL;
- edge_array[num_ed_temp + 1].markers = NULL;
- ++num_ed_temp;
+ if ( num_ed_temp + 1 > num_ed_limit )
+ {
+ unsigned int new_num_ed = num_ed_limit * 1.2;
+ edge_array = ( EDGE * ) ckrealloc ( edge_array, ( new_num_ed + 1 ) * sizeof ( EDGE ), ( num_ed_limit + 1 ) * sizeof ( EDGE ) );
+ num_ed_limit = new_num_ed;
+ int j;
+
+ for ( j = num_ed_temp + 1; j <= num_ed_limit; j++ )
+ {
+ edge_array[j].seq = NULL;
+ }
+
+ fprintf ( stderr, "Realloc edge array.\n" );
+ }
+
+ char *tightSeq = ( char * ) ckalloc ( sizeof ( char ) );
+ writeChar2tightString ( ch, tightSeq, 0 );
+ edge_array[num_ed_temp + 1].from_vt = from;
+ edge_array[num_ed_temp + 1].to_vt = to;
+ edge_array[num_ed_temp + 1].length = 1;
+ edge_array[num_ed_temp + 1].cvg = cvg;
+ edge_array[num_ed_temp + 1].bal_edge = bal_edge;
+ edge_array[num_ed_temp + 1].multi = 0;
+ edge_array[num_ed_temp + 1].deleted = 0;
+ edge_array[num_ed_temp + 1].flag = 0;
+
+ if ( edge_array[num_ed_temp + 1].seq )
+ {
+ free ( edge_array[num_ed_temp + 1].seq );
+ }
+
+ edge_array[num_ed_temp + 1].seq = tightSeq;
+ edge_array[num_ed_temp + 1].rv = NULL;
+ edge_array[num_ed_temp + 1].arcs = NULL;
+ edge_array[num_ed_temp + 1].markers = NULL;
+ ++num_ed_temp;
}
//Check whether kmers are equal to the front kmer and last kmer of the edge.
-boolean checkEqual ( unsigned int from, unsigned int to, char ch, unsigned int index, unsigned int * getIndex )
+boolean checkEqual ( unsigned int from, unsigned int to, char ch, unsigned int index, unsigned int *getIndex )
{
- if ( edge_array[index].length == 1 && ( ( edge_array[index].from_vt == from && edge_array[index].to_vt == to ) ) )
- {
- return true;
- }
+ if ( edge_array[index].length == 1 && ( ( edge_array[index].from_vt == from && edge_array[index].to_vt == to ) ) )
+ {
+ return true;
+ }
- return false;
+ return false;
}
//Whether edge exist in set.
-boolean EdgeExist ( unsigned int from, unsigned int to, char ch, kmer_t2 * node, unsigned int * index )
+boolean EdgeExist ( unsigned int from, unsigned int to, char ch, kmer_t2 *node, unsigned int *index )
{
- int i = 0;
- EDGEID * temp = node->edgeId;
+ int i = 0;
+ EDGEID *temp = node->edgeId;
- while ( temp )
- {
- *index = temp->edge;
+ while ( temp )
+ {
+ *index = temp->edge;
- if ( checkEqual ( from, to, ch, temp->edge, index ) )
- { return true; }
+ if ( checkEqual ( from, to, ch, temp->edge, index ) )
+ {
+ return true;
+ }
- temp = temp->next;
- }
+ temp = temp->next;
+ }
- return false;
+ return false;
}
//Update edgeId in node.
-void updateNode ( kmer_t2 * node, kmer_t2 * node1 )
+void updateNode ( kmer_t2 *node, kmer_t2 *node1 )
{
- struct edgeID * edgeid;
- edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
- edgeid->edge = num_ed_temp + 1;
- edgeid->flag = 0;
- edgeid->next = NULL;
-
- if ( node->edgeId )
- { edgeid->next = node->edgeId; }
-
- node->edgeId = edgeid;
- node->count++;
- edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
- edgeid->edge = num_ed_temp + 2;
- edgeid->flag = 1;
- edgeid->next = NULL;
-
- if ( node->edgeId )
- { edgeid->next = node->edgeId; }
-
- node->edgeId = edgeid;
- node->count++;
- edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
- edgeid->edge = num_ed_temp + 1;
- edgeid->flag = 2;
- edgeid->next = NULL;
-
- if ( node1->edgeId )
- { edgeid->next = node1->edgeId; }
-
- node1->edgeId = edgeid;
- node1->count++;
- edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
- edgeid->edge = num_ed_temp + 2;
- edgeid->flag = 3;
- edgeid->next = NULL;
-
- if ( node1->edgeId )
- { edgeid->next = node1->edgeId; }
-
- node1->edgeId = edgeid;
- node1->count++;
+ struct edgeID *edgeid;
+ edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
+ edgeid->edge = num_ed_temp + 1;
+ edgeid->flag = 0;
+ edgeid->next = NULL;
+
+ if ( node->edgeId )
+ {
+ edgeid->next = node->edgeId;
+ }
+
+ node->edgeId = edgeid;
+ node->count++;
+ edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
+ edgeid->edge = num_ed_temp + 2;
+ edgeid->flag = 1;
+ edgeid->next = NULL;
+
+ if ( node->edgeId )
+ {
+ edgeid->next = node->edgeId;
+ }
+
+ node->edgeId = edgeid;
+ node->count++;
+ edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
+ edgeid->edge = num_ed_temp + 1;
+ edgeid->flag = 2;
+ edgeid->next = NULL;
+
+ if ( node1->edgeId )
+ {
+ edgeid->next = node1->edgeId;
+ }
+
+ node1->edgeId = edgeid;
+ node1->count++;
+ edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
+ edgeid->edge = num_ed_temp + 2;
+ edgeid->flag = 3;
+ edgeid->next = NULL;
+
+ if ( node1->edgeId )
+ {
+ edgeid->next = node1->edgeId;
+ }
+
+ node1->edgeId = edgeid;
+ node1->count++;
}
/*************************************************
@@ -1142,185 +1188,191 @@ Description:
1. Check whether it can be solved or not.
2. Add edge when necessary.
Input:
- 1. from : whether it's 'from kmer'
- 2. from_ed : index of 'from edge'
- 3. to_ed : index of 'to edge'
- 4. node : node of 'last kmer' of 'from edge'
- 5. node1 : node of 'front kmer' of 'to edge'
- 6. maxk : max kmer
-Output:
- None.
-Return:
- None.
-*************************************************/
-void checkindegree ( int from, unsigned int from_ed, unsigned int to_ed, kmer_t2 * node, kmer_t2 * node1, int maxk )
-{
- int arcLeft_n = 0;
- int arcRight_n = 0;
- boolean exist = false;
- char ch = lastCharInKmer ( vt_array[edge_array[to_ed].from_vt].kmer ); //.low2 & 3;
- char backup = lastCharInKmer ( KmerRightBitMove ( vt_array[edge_array[from_ed].to_vt].kmer, ( overlaplen - 1 ) << 1 ) ); //.low2 & 3;
- unsigned int index;
- ARC * originalArc = NULL;
-
- if ( from <= 2 )
- {
- //out->in > 1
- arcCount2 ( from_ed, &arcRight_n );
-
- if ( arcRight_n > 1 )
- {
- exist = EdgeExist ( edge_array[from_ed].to_vt, edge_array[to_ed].from_vt, ch, node, &index );
-
- if ( !exist )
- {
- updateNode ( node, node1 );
- originalArc = getArcBetween ( from_ed, to_ed );
- edgeaddnumber += 2;
- addEdge ( edge_array[from_ed].to_vt, edge_array[to_ed].from_vt, ch, 2, originalArc->multiplicity * 10 );
- // if(overlaplen + step > maxk)
- {
- arcBuffer[0][arcBufferCount] = from_ed;
- arcBuffer[1][arcBufferCount] = num_ed_temp;
- arcBuffer[2][arcBufferCount++] = to_ed;
- }
- addEdge ( edge_array[getTwinEdge ( to_ed )].to_vt, edge_array[getTwinEdge ( from_ed )].from_vt, int_comp ( backup ), 0, originalArc->multiplicity * 10 );
- }
- else
- {
- // if(overlaplen + step > maxk)
- {
- arcBuffer[0][arcBufferCount] = from_ed;
- arcBuffer[1][arcBufferCount] = index;
- arcBuffer[2][arcBufferCount++] = to_ed;
- }
- }
- }
- }
- else
- {
- //out->in > 1
- arcCount2 ( getTwinEdge ( to_ed ), &arcLeft_n );
-
- if ( arcLeft_n > 1 )
- {
- exist = EdgeExist ( edge_array[from_ed].to_vt, edge_array[to_ed].from_vt, ch, node, &index );
-
- if ( !exist )
- {
- updateNode ( node, node1 );
- originalArc = getArcBetween ( from_ed, to_ed );
- edgeaddnumber += 2;
- addEdge ( edge_array[from_ed].to_vt, edge_array[to_ed].from_vt, ch, 2, originalArc->multiplicity * 10 );
- // if(overlaplen + step > maxk)
- {
- arcBuffer[0][arcBufferCount] = from_ed;
- arcBuffer[1][arcBufferCount] = num_ed_temp;
- arcBuffer[2][arcBufferCount++] = to_ed;
- }
- addEdge ( edge_array[getTwinEdge ( to_ed )].to_vt, edge_array[getTwinEdge ( from_ed )].from_vt, int_comp ( backup ), 0, originalArc->multiplicity * 10 );
- }
- else
- {
- // if(overlaplen + step > maxk)
- {
- arcBuffer[0][arcBufferCount] = from_ed;
- arcBuffer[1][arcBufferCount] = index;
- arcBuffer[2][arcBufferCount++] = to_ed;
- }
- }
- }
- }
-}
-
-//Add arc between two edges.
-static void add1Arc2 ( unsigned int from_ed, unsigned int to_ed, unsigned int weight )
-{
- if ( edge_array[from_ed].to_vt != edge_array[to_ed].from_vt )
- {
- fprintf ( stderr, "Warning : Inconsistant joins between %d and %d.\n", from_ed, to_ed );
- }
-
- unsigned int bal_fe = getTwinEdge ( from_ed );
- unsigned int bal_te = getTwinEdge ( to_ed );
-
- // fprintf(stderr, "from %u, bal %u\n", from_ed, bal_fe);
- // fprintf(stderr, "to %u, bal %u\n", to_ed, bal_te);
-
- if ( from_ed > num_ed_temp || to_ed > num_ed_temp || bal_fe > num_ed_temp || bal_te > num_ed_temp )
- {
- fprintf ( stderr, "Error : Edge id is out of range.\n" );
- return;
- }
-
- ARC * parc, *bal_parc;
- //both arcs already exist
- parc = getArcBetween ( from_ed, to_ed );
-
- if ( parc )
- {
- bal_parc = parc->bal_arc;
- parc->multiplicity += weight;
- bal_parc->multiplicity += weight;
- return;
- }
-
- //create new arcs
- parc = allocateArc ( to_ed );
- parc->multiplicity = weight;
- parc->prev = NULL;
-
- if ( edge_array[from_ed].arcs )
- { edge_array[from_ed].arcs->prev = parc; }
-
- parc->next = edge_array[from_ed].arcs;
- edge_array[from_ed].arcs = parc;
-
- // A->A'
- if ( bal_te == from_ed )
- {
- parc->bal_arc = parc;
- parc->multiplicity += weight;
- return;
- }
-
- bal_parc = allocateArc ( bal_fe );
- bal_parc->multiplicity = weight;
- bal_parc->prev = NULL;
-
- if ( edge_array[bal_te].arcs )
- { edge_array[bal_te].arcs->prev = bal_parc; }
+ 1. from : whether it's 'from kmer'
+ 2. from_ed : index of 'from edge'
+ 3. to_ed : index of 'to edge'
+ 4. node : node of 'last kmer' of 'from edge'
+ 5. node1 : node of 'front kmer' of 'to edge'
+ 6. maxk : max kmer
+Output:
+ None.
+Return:
+ None.
+*************************************************/
+void checkindegree ( int from, unsigned int from_ed, unsigned int to_ed, kmer_t2 *node, kmer_t2 *node1, int maxk )
+{
+ int arcLeft_n = 0;
+ int arcRight_n = 0;
+ boolean exist = false;
+ char ch = lastCharInKmer ( vt_array[edge_array[to_ed].from_vt].kmer ); //.low2 & 3;
+ char backup = lastCharInKmer ( KmerRightBitMove ( vt_array[edge_array[from_ed].to_vt].kmer, ( overlaplen - 1 ) << 1 ) ); //.low2 & 3;
+ unsigned int index;
+ ARC *originalArc = NULL;
+
+ if ( from <= 2 )
+ {
+ //out->in > 1
+ arcCount2 ( from_ed, &arcRight_n );
+
+ if ( arcRight_n > 1 )
+ {
+ exist = EdgeExist ( edge_array[from_ed].to_vt, edge_array[to_ed].from_vt, ch, node, &index );
+
+ if ( !exist )
+ {
+ updateNode ( node, node1 );
+ originalArc = getArcBetween ( from_ed, to_ed );
+ edgeaddnumber += 2;
+ addEdge ( edge_array[from_ed].to_vt, edge_array[to_ed].from_vt, ch, 2, originalArc->multiplicity * 10 );
+ // if(overlaplen + step > maxk)
+ {
+ arcBuffer[0][arcBufferCount] = from_ed;
+ arcBuffer[1][arcBufferCount] = num_ed_temp;
+ arcBuffer[2][arcBufferCount++] = to_ed;
+ }
+ addEdge ( edge_array[getTwinEdge ( to_ed )].to_vt, edge_array[getTwinEdge ( from_ed )].from_vt, int_comp ( backup ), 0, originalArc->multiplicity * 10 );
+ }
+ else
+ {
+ // if(overlaplen + step > maxk)
+ {
+ arcBuffer[0][arcBufferCount] = from_ed;
+ arcBuffer[1][arcBufferCount] = index;
+ arcBuffer[2][arcBufferCount++] = to_ed;
+ }
+ }
+ }
+ }
+ else
+ {
+ //out->in > 1
+ arcCount2 ( getTwinEdge ( to_ed ), &arcLeft_n );
+
+ if ( arcLeft_n > 1 )
+ {
+ exist = EdgeExist ( edge_array[from_ed].to_vt, edge_array[to_ed].from_vt, ch, node, &index );
+
+ if ( !exist )
+ {
+ updateNode ( node, node1 );
+ originalArc = getArcBetween ( from_ed, to_ed );
+ edgeaddnumber += 2;
+ addEdge ( edge_array[from_ed].to_vt, edge_array[to_ed].from_vt, ch, 2, originalArc->multiplicity * 10 );
+ // if(overlaplen + step > maxk)
+ {
+ arcBuffer[0][arcBufferCount] = from_ed;
+ arcBuffer[1][arcBufferCount] = num_ed_temp;
+ arcBuffer[2][arcBufferCount++] = to_ed;
+ }
+ addEdge ( edge_array[getTwinEdge ( to_ed )].to_vt, edge_array[getTwinEdge ( from_ed )].from_vt, int_comp ( backup ), 0, originalArc->multiplicity * 10 );
+ }
+ else
+ {
+ // if(overlaplen + step > maxk)
+ {
+ arcBuffer[0][arcBufferCount] = from_ed;
+ arcBuffer[1][arcBufferCount] = index;
+ arcBuffer[2][arcBufferCount++] = to_ed;
+ }
+ }
+ }
+ }
+}
- bal_parc->next = edge_array[bal_te].arcs;
- edge_array[bal_te].arcs = bal_parc;
- //link them to each other
- parc->bal_arc = bal_parc;
- bal_parc->bal_arc = parc;
+//Add arc between two edges.
+static void add1Arc2 ( unsigned int from_ed, unsigned int to_ed, unsigned int weight )
+{
+ if ( edge_array[from_ed].to_vt != edge_array[to_ed].from_vt )
+ {
+ fprintf ( stderr, "Warning : Inconsistant joins between %d and %d.\n", from_ed, to_ed );
+ }
+
+ unsigned int bal_fe = getTwinEdge ( from_ed );
+ unsigned int bal_te = getTwinEdge ( to_ed );
+
+ // fprintf(stderr, "from %u, bal %u\n", from_ed, bal_fe);
+ // fprintf(stderr, "to %u, bal %u\n", to_ed, bal_te);
+
+ if ( from_ed > num_ed_temp || to_ed > num_ed_temp || bal_fe > num_ed_temp || bal_te > num_ed_temp )
+ {
+ fprintf ( stderr, "Error : Edge id is out of range.\n" );
+ return;
+ }
+
+ ARC *parc, *bal_parc;
+ //both arcs already exist
+ parc = getArcBetween ( from_ed, to_ed );
+
+ if ( parc )
+ {
+ bal_parc = parc->bal_arc;
+ parc->multiplicity += weight;
+ bal_parc->multiplicity += weight;
+ return;
+ }
+
+ //create new arcs
+ parc = allocateArc ( to_ed );
+ parc->multiplicity = weight;
+ parc->prev = NULL;
+
+ if ( edge_array[from_ed].arcs )
+ {
+ edge_array[from_ed].arcs->prev = parc;
+ }
+
+ parc->next = edge_array[from_ed].arcs;
+ edge_array[from_ed].arcs = parc;
+
+ // A->A'
+ if ( bal_te == from_ed )
+ {
+ parc->bal_arc = parc;
+ parc->multiplicity += weight;
+ return;
+ }
+
+ bal_parc = allocateArc ( bal_fe );
+ bal_parc->multiplicity = weight;
+ bal_parc->prev = NULL;
+
+ if ( edge_array[bal_te].arcs )
+ {
+ edge_array[bal_te].arcs->prev = bal_parc;
+ }
+
+ bal_parc->next = edge_array[bal_te].arcs;
+ edge_array[bal_te].arcs = bal_parc;
+ //link them to each other
+ parc->bal_arc = bal_parc;
+ bal_parc->bal_arc = parc;
}
//Count step between two neighbour edges.
int countstep ( unsigned int to_vt, unsigned int from_vt )
{
- if ( to_vt == from_vt )
- {
- return 0;
- }
-
- Kmer to, from;
- Kmer filtertemp;
- to = vt_array[to_vt].kmer;
- from = vt_array[from_vt].kmer;
- int i = 0;
-
- for ( i = 0; i <= nowstep2; ++i )
- {
- filtertemp = createFilter ( overlaplen - i );
-
- if ( KmerEqual ( KmerRightBitMove ( from, i << 1 ), KmerAnd ( to, filtertemp ) ) )
- { return i; }
- }
-
- return -1;
+ if ( to_vt == from_vt )
+ {
+ return 0;
+ }
+
+ Kmer to, from;
+ Kmer filtertemp;
+ to = vt_array[to_vt].kmer;
+ from = vt_array[from_vt].kmer;
+ int i = 0;
+
+ for ( i = 0; i <= nowstep2; ++i )
+ {
+ filtertemp = createFilter ( overlaplen - i );
+
+ if ( KmerEqual ( KmerRightBitMove ( from, i << 1 ), KmerAnd ( to, filtertemp ) ) )
+ {
+ return i;
+ }
+ }
+
+ return -1;
}
/*************************************************
@@ -1337,290 +1389,326 @@ Return:
*************************************************/
void freshEdge ( int maxk )
{
- unsigned int i = 0, j = 0;
- boolean found = 0;
- kmer_t2 * node, *node1;
- int count = 0;
- char ch = 0;
- Kmer word, bal_word;
- int from_vt_id = 0, to_vt_id = 0;
- char from_backup, to_backup;
- char * tightSeq = NULL;
- int bal_ed = 0;
- int arcLeft_n = 0, arcRight_n = 0;
- ARC * temparc = NULL;
- unsigned int tempto_ed = 0;
- fprintf ( stderr, "There are %d edge(s).\n", num_ed );
- // if(overlaplen + step > maxk)
- {
- arcBuffer = ( unsigned int ** ) ckalloc ( sizeof ( unsigned int * ) * 3 );
- arcBuffer[0] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
- arcBuffer[1] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
- arcBuffer[2] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
- }
- int count_noextend = 0;
- num_ed_temp = num_ed;
-
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
- { continue; }
-
- bal_ed = getTwinEdge ( i );
- arcCount2 ( i, &arcRight_n );
- arcCount2 ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n == 1 )
- {
- if ( edge_array[bal_ed].to_vt != edge_array[edge_array[bal_ed].arcs->to_ed].from_vt )
- {
- ch = lastCharInKmer ( KmerRightBitMove ( vt_array[edge_array[getTwinEdge ( edge_array[bal_ed].arcs->to_ed )].to_vt].kmer, ( overlaplen - 1 ) << 1 ) ); //.low2 & 3;
- int temp = kmer2edge ( 1, i, ch, &from_backup );
-
- if ( temp != 0 )
- { count_noextend++; }
- }
- }
- else if ( arcLeft_n > 1 )
- {
- word = vt_array[edge_array[i].from_vt].kmer;
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- { found = search_kmerset2 ( KmerSetsNew, word, &node ); }
- else
- { found = search_kmerset2 ( KmerSetsNew, bal_word, &node ); }
-
- if ( !found )
- {
- fprintf ( stderr, "When refreshing edges, 'from vertex' is not found, to_vt %d kmer ", edge_array[i].from_vt );
- printKmerSeq ( stderr, vt_array[edge_array[i].from_vt].kmer );
- fprintf ( stderr, " .\n" );
- exit ( -1 );
- }
-
- // if(overlaplen < maxk)
- {
- temparc = edge_array[bal_ed].arcs;
-
- while ( temparc )
- {
- tempto_ed = getTwinEdge ( temparc->to_ed );
- word = vt_array[edge_array[tempto_ed].to_vt].kmer;
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- { found = search_kmerset2 ( KmerSetsNew, word, &node1 ); }
- else
- { found = search_kmerset2 ( KmerSetsNew, bal_word, &node1 ); }
-
- if ( !found )
- {
- fprintf ( stderr, "When refreshing edges, 'to vertex' is not found, to_vt %d kmer ", edge_array[tempto_ed].to_vt );
- printKmerSeq ( stderr, vt_array[edge_array[tempto_ed].to_vt].kmer );
- fprintf ( stderr, " .\n" );
- exit ( -1 );
- }
-
- if ( node1 && node )
- { checkindegree ( 1, tempto_ed, i, node1, node, maxk ); }
-
- temparc = temparc->next;
- }
- }
- }
-
- if ( arcRight_n == 1 )
- {
- if ( edge_array[i].to_vt != edge_array[edge_array[i].arcs->to_ed].from_vt )
- {
- ch = lastCharInKmer ( vt_array[edge_array[edge_array[i].arcs->to_ed].from_vt].kmer ); //.low2 & 3;
- int temp = kmer2edge ( 3, i, ch, &to_backup );
-
- if ( temp != 0 )
- { count_noextend++; }
- }
- }
- else if ( arcRight_n > 1 )
- {
- word = vt_array[edge_array[i].to_vt].kmer;
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- { found = search_kmerset2 ( KmerSetsNew, word, &node ); }
- else
- { found = search_kmerset2 ( KmerSetsNew, bal_word, &node ); }
-
- if ( !found )
- {
- fprintf ( stderr, "When refreshing edges, 'to vertex' is not found, to_vt %d kmer ", edge_array[i].to_vt );
- printKmerSeq ( stderr, vt_array[edge_array[i].to_vt].kmer );
- fprintf ( stderr, " .\n" );
- exit ( -1 );
- }
-
- // if(overlaplen < maxk)
- {
- temparc = edge_array[i].arcs;
-
- while ( temparc )
- {
- tempto_ed = temparc->to_ed;
- word = vt_array[edge_array[tempto_ed].from_vt].kmer;
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- { found = search_kmerset2 ( KmerSetsNew, word, &node1 ); }
- else
- { found = search_kmerset2 ( KmerSetsNew, bal_word, &node1 ); }
-
- if ( !found )
- {
- fprintf ( stderr, "When refreshing edges, 'from vertex' is not found, from_vt %d kmer ", edge_array[tempto_ed].from_vt );
- printKmerSeq ( stderr, vt_array[edge_array[tempto_ed].from_vt].kmer );
- fprintf ( stderr, " .\n" );
- exit ( -1 );
- }
-
- if ( node1 && node )
- { checkindegree ( 3, i, tempto_ed, node, node1, maxk ); }
-
- temparc = temparc->next;
- }
- }
- }
-
- //two edge change at the same time
- ++i;
- }
-
- if ( count_noextend )
- { fprintf ( stderr, "%d edge(s) not extended.\n", count_noextend ); }
-
- // if(overlaplen + step > maxk)
- {
- ARC * tempArc, *tempBalArc, *originalArc, *temp, *bal_temp;
- unsigned int from = 0;
- unsigned int mid = 0;
- unsigned int to = 0;
- int count_arcdelete = 0, count_arcadd = 0;
- int arcmulti = 0;
- int arcnotfound = 0;
-
- for ( i = 0; i < arcBufferCount; ++i )
- {
- from = arcBuffer[0][i];
- mid = arcBuffer[1][i];
- to = arcBuffer[2][i];
-
- if ( from > num_ed || mid > num_ed_temp || to > num_ed )
- {
- fprintf ( stderr, "Error : Edge id is out of range.\n" );
- exit ( -1 );
- }
-
- originalArc = getArcBetween ( from, to );
-
- if ( originalArc )
- {
- arcmulti = originalArc->multiplicity;
- count_arcdelete++;
- edge_array[from].arcs = deleteArc ( edge_array[from].arcs, originalArc );
- count_arcadd += 2;
- add1Arc2 ( from, mid, arcmulti );
- add1Arc2 ( mid, to, arcmulti );
- }
- else
- {
- originalArc = getArcBetween ( getTwinEdge ( to ), getTwinEdge ( from ) );
-
- if ( originalArc )
- {
- arcmulti = originalArc->multiplicity;
- count_arcdelete++;
- edge_array[getTwinEdge ( to )].arcs = deleteArc ( edge_array[getTwinEdge ( to )].arcs, originalArc );
- }
- else
- {
- ++arcnotfound;
- arcmulti = 2;
- }
-
- count_arcadd += 2;
- add1Arc2 ( from, mid, arcmulti );
- add1Arc2 ( mid, to, arcmulti );
- }
- }
-
- fprintf ( stderr, "Add edges to the graph: %d arc(s) deleted, %d arc(s) added.\n", count_arcdelete, count_arcadd );
-
- if ( arcnotfound )
- { fprintf ( stderr, "Warning : %d arc(s) are not found when checking.\n", arcnotfound ); }
-
- arcBufferCount = 0;
- }
- num_ed = num_ed_temp;
- // if(overlaplen + step > maxk)
- {
- free ( arcBuffer[2] );
- free ( arcBuffer[1] );
- free ( arcBuffer[0] );
- free ( arcBuffer );
- }
+ unsigned int i = 0, j = 0;
+ boolean found = 0;
+ kmer_t2 *node, *node1;
+ int count = 0;
+ char ch = 0;
+ Kmer word, bal_word;
+ int from_vt_id = 0, to_vt_id = 0;
+ char from_backup, to_backup;
+ char *tightSeq = NULL;
+ int bal_ed = 0;
+ int arcLeft_n = 0, arcRight_n = 0;
+ ARC *temparc = NULL;
+ unsigned int tempto_ed = 0;
+ fprintf ( stderr, "There are %d edge(s).\n", num_ed );
+ // if(overlaplen + step > maxk)
+ {
+ arcBuffer = ( unsigned int ** ) ckalloc ( sizeof ( unsigned int * ) * 3 );
+ arcBuffer[0] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
+ arcBuffer[1] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
+ arcBuffer[2] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
+ }
+ int count_noextend = 0;
+ num_ed_temp = num_ed;
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
+ {
+ continue;
+ }
+
+ bal_ed = getTwinEdge ( i );
+ arcCount2 ( i, &arcRight_n );
+ arcCount2 ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n == 1 )
+ {
+ if ( edge_array[bal_ed].to_vt != edge_array[edge_array[bal_ed].arcs->to_ed].from_vt )
+ {
+ ch = lastCharInKmer ( KmerRightBitMove ( vt_array[edge_array[getTwinEdge ( edge_array[bal_ed].arcs->to_ed )].to_vt].kmer, ( overlaplen - 1 ) << 1 ) ); //.low2 & 3;
+ int temp = kmer2edge ( 1, i, ch, &from_backup );
+
+ if ( temp != 0 )
+ {
+ count_noextend++;
+ }
+ }
+ }
+ else if ( arcLeft_n > 1 )
+ {
+ word = vt_array[edge_array[i].from_vt].kmer;
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ found = search_kmerset2 ( KmerSetsNew, word, &node );
+ }
+ else
+ {
+ found = search_kmerset2 ( KmerSetsNew, bal_word, &node );
+ }
+
+ if ( !found )
+ {
+ fprintf ( stderr, "When refreshing edges, 'from vertex' is not found, to_vt %d kmer ", edge_array[i].from_vt );
+ printKmerSeq ( stderr, vt_array[edge_array[i].from_vt].kmer );
+ fprintf ( stderr, " .\n" );
+ exit ( -1 );
+ }
+
+ // if(overlaplen < maxk)
+ {
+ temparc = edge_array[bal_ed].arcs;
+
+ while ( temparc )
+ {
+ tempto_ed = getTwinEdge ( temparc->to_ed );
+ word = vt_array[edge_array[tempto_ed].to_vt].kmer;
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ found = search_kmerset2 ( KmerSetsNew, word, &node1 );
+ }
+ else
+ {
+ found = search_kmerset2 ( KmerSetsNew, bal_word, &node1 );
+ }
+
+ if ( !found )
+ {
+ fprintf ( stderr, "When refreshing edges, 'to vertex' is not found, to_vt %d kmer ", edge_array[tempto_ed].to_vt );
+ printKmerSeq ( stderr, vt_array[edge_array[tempto_ed].to_vt].kmer );
+ fprintf ( stderr, " .\n" );
+ exit ( -1 );
+ }
+
+ if ( node1 && node )
+ {
+ checkindegree ( 1, tempto_ed, i, node1, node, maxk );
+ }
+
+ temparc = temparc->next;
+ }
+ }
+ }
+
+ if ( arcRight_n == 1 )
+ {
+ if ( edge_array[i].to_vt != edge_array[edge_array[i].arcs->to_ed].from_vt )
+ {
+ ch = lastCharInKmer ( vt_array[edge_array[edge_array[i].arcs->to_ed].from_vt].kmer ); //.low2 & 3;
+ int temp = kmer2edge ( 3, i, ch, &to_backup );
+
+ if ( temp != 0 )
+ {
+ count_noextend++;
+ }
+ }
+ }
+ else if ( arcRight_n > 1 )
+ {
+ word = vt_array[edge_array[i].to_vt].kmer;
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ found = search_kmerset2 ( KmerSetsNew, word, &node );
+ }
+ else
+ {
+ found = search_kmerset2 ( KmerSetsNew, bal_word, &node );
+ }
+
+ if ( !found )
+ {
+ fprintf ( stderr, "When refreshing edges, 'to vertex' is not found, to_vt %d kmer ", edge_array[i].to_vt );
+ printKmerSeq ( stderr, vt_array[edge_array[i].to_vt].kmer );
+ fprintf ( stderr, " .\n" );
+ exit ( -1 );
+ }
+
+ // if(overlaplen < maxk)
+ {
+ temparc = edge_array[i].arcs;
+
+ while ( temparc )
+ {
+ tempto_ed = temparc->to_ed;
+ word = vt_array[edge_array[tempto_ed].from_vt].kmer;
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ found = search_kmerset2 ( KmerSetsNew, word, &node1 );
+ }
+ else
+ {
+ found = search_kmerset2 ( KmerSetsNew, bal_word, &node1 );
+ }
+
+ if ( !found )
+ {
+ fprintf ( stderr, "When refreshing edges, 'from vertex' is not found, from_vt %d kmer ", edge_array[tempto_ed].from_vt );
+ printKmerSeq ( stderr, vt_array[edge_array[tempto_ed].from_vt].kmer );
+ fprintf ( stderr, " .\n" );
+ exit ( -1 );
+ }
+
+ if ( node1 && node )
+ {
+ checkindegree ( 3, i, tempto_ed, node, node1, maxk );
+ }
+
+ temparc = temparc->next;
+ }
+ }
+ }
+
+ //two edge change at the same time
+ ++i;
+ }
+
+ if ( count_noextend )
+ {
+ fprintf ( stderr, "%d edge(s) not extended.\n", count_noextend );
+ }
+
+ // if(overlaplen + step > maxk)
+ {
+ ARC *tempArc, *tempBalArc, *originalArc, *temp, *bal_temp;
+ unsigned int from = 0;
+ unsigned int mid = 0;
+ unsigned int to = 0;
+ int count_arcdelete = 0, count_arcadd = 0;
+ int arcmulti = 0;
+ int arcnotfound = 0;
+
+ for ( i = 0; i < arcBufferCount; ++i )
+ {
+ from = arcBuffer[0][i];
+ mid = arcBuffer[1][i];
+ to = arcBuffer[2][i];
+
+ if ( from > num_ed || mid > num_ed_temp || to > num_ed )
+ {
+ fprintf ( stderr, "Error : Edge id is out of range.\n" );
+ exit ( -1 );
+ }
+
+ originalArc = getArcBetween ( from, to );
+
+ if ( originalArc )
+ {
+ arcmulti = originalArc->multiplicity;
+ count_arcdelete++;
+ edge_array[from].arcs = deleteArc ( edge_array[from].arcs, originalArc );
+ count_arcadd += 2;
+ add1Arc2 ( from, mid, arcmulti );
+ add1Arc2 ( mid, to, arcmulti );
+ }
+ else
+ {
+ originalArc = getArcBetween ( getTwinEdge ( to ), getTwinEdge ( from ) );
+
+ if ( originalArc )
+ {
+ arcmulti = originalArc->multiplicity;
+ count_arcdelete++;
+ edge_array[getTwinEdge ( to )].arcs = deleteArc ( edge_array[getTwinEdge ( to )].arcs, originalArc );
+ }
+ else
+ {
+ ++arcnotfound;
+ arcmulti = 2;
+ }
+
+ count_arcadd += 2;
+ add1Arc2 ( from, mid, arcmulti );
+ add1Arc2 ( mid, to, arcmulti );
+ }
+ }
+
+ fprintf ( stderr, "Add edges to the graph: %d arc(s) deleted, %d arc(s) added.\n", count_arcdelete, count_arcadd );
+
+ if ( arcnotfound )
+ {
+ fprintf ( stderr, "Warning : %d arc(s) are not found when checking.\n", arcnotfound );
+ }
+
+ arcBufferCount = 0;
+ }
+ num_ed = num_ed_temp;
+ // if(overlaplen + step > maxk)
+ {
+ free ( arcBuffer[2] );
+ free ( arcBuffer[1] );
+ free ( arcBuffer[0] );
+ free ( arcBuffer );
+ }
}
//Copy edge from source to target.
-void copy1Edge ( EDGE * source, EDGE * target )
+void copy1Edge ( EDGE *source, EDGE *target )
{
- target->from_vt = source->from_vt;
- target->to_vt = source->to_vt;
- target->length = source->length;
- target->cvg = source->cvg;
- target->multi = source->multi;
-
- if ( target->seq )
- {
- free ( ( void * ) target->seq );
- }
-
- target->seq = source->seq;
- source->seq = NULL;
- target->arcs = source->arcs;
- source->arcs = NULL;
- target->deleted = source->deleted;
+ target->from_vt = source->from_vt;
+ target->to_vt = source->to_vt;
+ target->length = source->length;
+ target->cvg = source->cvg;
+ target->multi = source->multi;
+
+ if ( target->seq )
+ {
+ free ( ( void * ) target->seq );
+ }
+
+ target->seq = source->seq;
+ source->seq = NULL;
+ target->arcs = source->arcs;
+ source->arcs = NULL;
+ target->deleted = source->deleted;
}
//Check whether two bases are equal.
int BaseEqual ( char ch1, char ch2 )
{
- if ( ch1 == ch2 )
- { return 0; }
- else if ( ch1 > ch2 )
- { return 1; }
- else
- { return -1; }
+ if ( ch1 == ch2 )
+ {
+ return 0;
+ }
+ else if ( ch1 > ch2 )
+ {
+ return 1;
+ }
+ else
+ {
+ return -1;
+ }
}
//Check whether two edges are equal.
int EdgeEqual ( unsigned int prev, unsigned int next )
{
- int i = 0;
- int length = edge_array[prev].length;
- char ch1, ch2;
- int equal = 0;
-
- for ( i = 0; i < length; ++i )
- {
- ch1 = int2base ( ( int ) getCharInTightString ( edge_array[prev].seq, i ) );
- ch2 = int2base ( ( int ) getCharInTightString ( edge_array[next].seq, i ) );
-
- if ( ( equal = BaseEqual ( ch1, ch2 ) ) )
- {
- return equal;
- }
- }
-
- return 0;
+ int i = 0;
+ int length = edge_array[prev].length;
+ char ch1, ch2;
+ int equal = 0;
+
+ for ( i = 0; i < length; ++i )
+ {
+ ch1 = int2base ( ( int ) getCharInTightString ( edge_array[prev].seq, i ) );
+ ch2 = int2base ( ( int ) getCharInTightString ( edge_array[next].seq, i ) );
+
+ if ( ( equal = BaseEqual ( ch1, ch2 ) ) )
+ {
+ return equal;
+ }
+ }
+
+ return 0;
}
/*************************************************
Function:
@@ -1636,114 +1724,118 @@ Return:
*************************************************/
void swapedge()
{
- unsigned int i;
- ARC * arc, *bal_arc, *temp_arc;
- int count_swap = 0, count_equal = 0;
-
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
- { continue; }
-
- if ( EdSmallerThanTwin ( i ) )
- {
- if ( KmerLarger ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) )
- {
- count_swap++;
- copyEdge ( i, num_ed + 1 + 1 );
- copyEdge ( i + 1, num_ed + 1 );
- copyEdge ( num_ed + 1, i );
- copyEdge ( num_ed + 1 + 1, i + 1 );
- edge_array[i].bal_edge = 2;
- edge_array[i + 1].bal_edge = 0;
- //take care of the arcs
- arc = edge_array[i].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = i + 1;
- arc = arc->next;
- }
-
- arc = edge_array[i + 1].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = i;
- arc = arc->next;
- }
- }
- else if ( KmerEqual ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) )
- {
- int temp = EdgeEqual ( i, i + 1 );
-
- if ( temp == 0 )
- {
- count_equal++;
- edge_array[i].bal_edge = 1;
- delete1Edge ( i + 1 );
- //take care of the arcs
- arc = edge_array[i].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = i;
- arc = arc->next;
- }
-
- bal_arc = edge_array[i + 1].arcs;
- edge_array[i + 1].arcs = NULL;
-
- while ( bal_arc )
- {
- temp_arc = bal_arc;
- bal_arc = bal_arc->next;
-
- if ( edge_array[i].arcs )
- { edge_array[i].arcs->prev = temp_arc; }
-
- temp_arc->next = edge_array[i].arcs;
- edge_array[i].arcs = temp_arc;
- }
- }
- else if ( temp > 0 )
- {
- count_swap++;
- copyEdge ( i, num_ed + 1 + 1 );
- copyEdge ( i + 1, num_ed + 1 );
- copyEdge ( num_ed + 1, i );
- copyEdge ( num_ed + 1 + 1, i + 1 );
- edge_array[i].bal_edge = 2;
- edge_array[i + 1].bal_edge = 0;
- //take care of the arcs
- arc = edge_array[i].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = i + 1;
- arc = arc->next;
- }
-
- arc = edge_array[i + 1].arcs;
-
- while ( arc )
- {
- arc->bal_arc->to_ed = i;
- arc = arc->next;
- }
- }
- }
-
- ++i;
- }
- else
- {
- delete1Edge ( i );
- fprintf ( stderr, "Warning : Front edge %d is larger than %d.\n", i, i + 1 );
- }
- }
-
- fprintf ( stderr, "%d none-palindrome edge(s) swapped, %d palindrome edge(s) processed.\n", count_swap, count_equal );
+ unsigned int i;
+ ARC *arc, *bal_arc, *temp_arc;
+ int count_swap = 0, count_equal = 0;
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
+ {
+ continue;
+ }
+
+ if ( EdSmallerThanTwin ( i ) )
+ {
+ if ( KmerLarger ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) )
+ {
+ count_swap++;
+ copyEdge ( i, num_ed + 1 + 1 );
+ copyEdge ( i + 1, num_ed + 1 );
+ copyEdge ( num_ed + 1, i );
+ copyEdge ( num_ed + 1 + 1, i + 1 );
+ edge_array[i].bal_edge = 2;
+ edge_array[i + 1].bal_edge = 0;
+ //take care of the arcs
+ arc = edge_array[i].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = i + 1;
+ arc = arc->next;
+ }
+
+ arc = edge_array[i + 1].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = i;
+ arc = arc->next;
+ }
+ }
+ else if ( KmerEqual ( vt_array[edge_array[i].from_vt].kmer, vt_array[edge_array[i + 1].from_vt].kmer ) )
+ {
+ int temp = EdgeEqual ( i, i + 1 );
+
+ if ( temp == 0 )
+ {
+ count_equal++;
+ edge_array[i].bal_edge = 1;
+ delete1Edge ( i + 1 );
+ //take care of the arcs
+ arc = edge_array[i].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = i;
+ arc = arc->next;
+ }
+
+ bal_arc = edge_array[i + 1].arcs;
+ edge_array[i + 1].arcs = NULL;
+
+ while ( bal_arc )
+ {
+ temp_arc = bal_arc;
+ bal_arc = bal_arc->next;
+
+ if ( edge_array[i].arcs )
+ {
+ edge_array[i].arcs->prev = temp_arc;
+ }
+
+ temp_arc->next = edge_array[i].arcs;
+ edge_array[i].arcs = temp_arc;
+ }
+ }
+ else if ( temp > 0 )
+ {
+ count_swap++;
+ copyEdge ( i, num_ed + 1 + 1 );
+ copyEdge ( i + 1, num_ed + 1 );
+ copyEdge ( num_ed + 1, i );
+ copyEdge ( num_ed + 1 + 1, i + 1 );
+ edge_array[i].bal_edge = 2;
+ edge_array[i + 1].bal_edge = 0;
+ //take care of the arcs
+ arc = edge_array[i].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = i + 1;
+ arc = arc->next;
+ }
+
+ arc = edge_array[i + 1].arcs;
+
+ while ( arc )
+ {
+ arc->bal_arc->to_ed = i;
+ arc = arc->next;
+ }
+ }
+ }
+
+ ++i;
+ }
+ else
+ {
+ delete1Edge ( i );
+ fprintf ( stderr, "Warning : Front edge %d is larger than %d.\n", i, i + 1 );
+ }
+ }
+
+ fprintf ( stderr, "%d none-palindrome edge(s) swapped, %d palindrome edge(s) processed.\n", count_swap, count_equal );
}
/*************************************************
Function:
@@ -1760,91 +1852,99 @@ Return:
-1 if a smaller than b.
0 if a equal to b.
*************************************************/
-static int cmp_seq ( const void * a, const void * b )
+static int cmp_seq ( const void *a, const void *b )
{
- EDGE_SUB * A, *B;
- A = ( EDGE_SUB * ) a;
- B = ( EDGE_SUB * ) b;
-
- if ( KmerLarger ( vt_array[A->from_vt].kmer, vt_array[B->from_vt].kmer ) )
- {
- return 1;
- }
- else if ( KmerSmaller ( vt_array[A->from_vt].kmer , vt_array[B->from_vt].kmer ) )
- {
- return -1;
- }
- else
- {
- if ( A->seq[0] > B->seq[0] )
- {
- return 1;
- }
- else if ( A->seq[0] == B->seq[0] )
- {
- int i = 0;
-
- for ( i = 1; i < A->length && i < B->length; i++ )
- {
- if ( getCharInTightString ( A->seq, i ) > getCharInTightString ( B->seq, i ) )
- { return 1; }
- else if ( getCharInTightString ( A->seq, i ) < getCharInTightString ( B->seq, i ) )
- { return -1; }
- }
-
- if ( i == A->length && i < B->length )
- { return -1; }
- else if ( i < A->length && i == B->length )
- { return 1; }
- else
- {
- printKmerSeq ( stderr , vt_array[A->from_vt].kmer );
- fprintf ( stderr , "\n" );
- printKmerSeq ( stderr , vt_array[B->from_vt].kmer );
- fprintf ( stderr , "\n" );
-
- for ( i = 0; i < A->length; i++ )
- {
- fprintf ( stderr, "%c", int2base ( ( int ) getCharInTightString ( A->seq, i ) ) );
- }
-
- fprintf ( stderr , "\n" );
-
- for ( i = 0; i < B->length; i++ )
- {
- fprintf ( stderr, "%c", int2base ( ( int ) getCharInTightString ( B->seq, i ) ) );
- }
-
- fprintf ( stderr , "\n" );
- fprintf ( stderr, "cmp_seq:\terr\n" );
- exit ( 0 );
- return 0;
- }
- }
- else
- {
- return -1;
- }
- }
+ EDGE_SUB *A, *B;
+ A = ( EDGE_SUB * ) a;
+ B = ( EDGE_SUB * ) b;
+
+ if ( KmerLarger ( vt_array[A->from_vt].kmer, vt_array[B->from_vt].kmer ) )
+ {
+ return 1;
+ }
+ else if ( KmerSmaller ( vt_array[A->from_vt].kmer , vt_array[B->from_vt].kmer ) )
+ {
+ return -1;
+ }
+ else
+ {
+ if ( A->seq[0] > B->seq[0] )
+ {
+ return 1;
+ }
+ else if ( A->seq[0] == B->seq[0] )
+ {
+ int i = 0;
+
+ for ( i = 1; i < A->length && i < B->length; i++ )
+ {
+ if ( getCharInTightString ( A->seq, i ) > getCharInTightString ( B->seq, i ) )
+ {
+ return 1;
+ }
+ else if ( getCharInTightString ( A->seq, i ) < getCharInTightString ( B->seq, i ) )
+ {
+ return -1;
+ }
+ }
+
+ if ( i == A->length && i < B->length )
+ {
+ return -1;
+ }
+ else if ( i < A->length && i == B->length )
+ {
+ return 1;
+ }
+ else
+ {
+ printKmerSeq ( stderr , vt_array[A->from_vt].kmer );
+ fprintf ( stderr , "\n" );
+ printKmerSeq ( stderr , vt_array[B->from_vt].kmer );
+ fprintf ( stderr , "\n" );
+
+ for ( i = 0; i < A->length; i++ )
+ {
+ fprintf ( stderr, "%c", int2base ( ( int ) getCharInTightString ( A->seq, i ) ) );
+ }
+
+ fprintf ( stderr , "\n" );
+
+ for ( i = 0; i < B->length; i++ )
+ {
+ fprintf ( stderr, "%c", int2base ( ( int ) getCharInTightString ( B->seq, i ) ) );
+ }
+
+ fprintf ( stderr , "\n" );
+ fprintf ( stderr, "cmp_seq:\terr\n" );
+ exit ( 0 );
+ return 0;
+ }
+ }
+ else
+ {
+ return -1;
+ }
+ }
}
//Copy edge from source to target.
-static void copyOneEdge ( EDGE * target , EDGE * source )
+static void copyOneEdge ( EDGE *target , EDGE *source )
{
- target->from_vt = source->from_vt;
- target->to_vt = source->to_vt;
- target->length = source->length;
- target->cvg = source->cvg;
- target->multi = source->multi;
- target->flag = source->flag;
- target->bal_edge = source->bal_edge;
- target->seq = source->seq;
- source->seq = NULL;
- target->arcs = source->arcs;
- source->arcs = NULL ;
- target->markers = source->markers;
- source->markers = NULL;
- target->deleted = source->deleted;
+ target->from_vt = source->from_vt;
+ target->to_vt = source->to_vt;
+ target->length = source->length;
+ target->cvg = source->cvg;
+ target->multi = source->multi;
+ target->flag = source->flag;
+ target->bal_edge = source->bal_edge;
+ target->seq = source->seq;
+ source->seq = NULL;
+ target->arcs = source->arcs;
+ source->arcs = NULL ;
+ target->markers = source->markers;
+ source->markers = NULL;
+ target->deleted = source->deleted;
}
/*************************************************
@@ -1861,13 +1961,13 @@ Return:
*************************************************/
static void updateArcToEd ( unsigned int ed_index )
{
- ARC * arc = edge_array[ed_index].arcs;
+ ARC *arc = edge_array[ed_index].arcs;
- while ( arc )
- {
- arc->to_ed = index_array[arc->to_ed];
- arc = arc->next;
- }
+ while ( arc )
+ {
+ arc->to_ed = index_array[arc->to_ed];
+ arc = arc->next;
+ }
}
/*************************************************
@@ -1885,92 +1985,99 @@ Return:
///*
void sortedge()
{
- unsigned int index ;
- EDGE_SUB * sort_edge;
- sort_edge = ( EDGE_SUB * ) ckalloc ( sizeof ( EDGE_SUB ) * ( num_ed + 1 ) );
- unsigned int i = 1;
-
- for ( index = 1 ; index <= num_ed ; index ++ )
- {
- sort_edge[i].from_vt = edge_array[index].from_vt;
- sort_edge[i].seq = edge_array[index].seq;
- sort_edge[i].to_vt = index; // record old id
- sort_edge[i].length = edge_array[index].length;
- i++;
-
- if ( !EdSameAsTwin ( index ) )
- {
- index++;
- }
- }
-
- qsort ( & ( sort_edge[1] ), i - 1, sizeof ( sort_edge[1] ), cmp_seq );
- index_array = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * ( num_ed + 1 ) ); // used to record new id
- unsigned int new_index = 1, old_index;
-
- for ( index = 1; index <= i - 1; index++ )
- {
- old_index = sort_edge[index].to_vt; // old id
- sort_edge[index].seq = NULL;
- index_array[old_index] = new_index++;// old id -> new id
-
- if ( !EdSameAsTwin ( old_index ) )
- {
- index_array[old_index + 1] = new_index++; // old id -> new id
- }
- }
-
- bool * copy_array = (bool * ) ckalloc ( sizeof ( bool ) * ( num_ed + 1 ) );
- EDGE *old_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) );
- EDGE *new_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) );
- unsigned int next_index;
- for ( index = 1; index <= num_ed; index++ )
- {
- if(!copy_array[index])
- {
- next_index = index;
- new_index = index_array[next_index];
- if(!copy_array[next_index])// && next_index != new_index
- {
- if(copy_array[new_index])
- {
- fprintf(stderr, "Copy error: never reach here.");
- }
- copy_array[next_index] = 1;
- if(next_index != new_index)
- {
- copyOneEdge (old_edge, &(edge_array[new_index]));
- copyOneEdge ( & ( edge_array[new_index] ), & ( edge_array[next_index] ) );
- }
- updateArcToEd ( new_index );
-
- next_index = new_index;
- new_index = index_array[next_index];
- while(!copy_array[next_index])
- {
- if(next_index == new_index)
- {
- fprintf(stderr, "Index error: never reach here.");
- }
- copy_array[next_index] = 1;
- copyOneEdge (new_edge, &(edge_array[new_index]));
- copyOneEdge ( & ( edge_array[new_index] ), old_edge);
- updateArcToEd ( new_index );
- copyOneEdge (old_edge, new_edge);
-
- next_index = new_index;
- new_index = index_array[next_index];
- }
- }
- }
- }
-
- free (copy_array);
- free (old_edge);
- free (new_edge);
- free ( index_array );
- free ( sort_edge );
- fprintf(stderr, "%d edge(s) sorted.\n", num_ed);
+ unsigned int index ;
+ EDGE_SUB *sort_edge;
+ sort_edge = ( EDGE_SUB * ) ckalloc ( sizeof ( EDGE_SUB ) * ( num_ed + 1 ) );
+ unsigned int i = 1;
+
+ for ( index = 1 ; index <= num_ed ; index ++ )
+ {
+ sort_edge[i].from_vt = edge_array[index].from_vt;
+ sort_edge[i].seq = edge_array[index].seq;
+ sort_edge[i].to_vt = index; // record old id
+ sort_edge[i].length = edge_array[index].length;
+ i++;
+
+ if ( !EdSameAsTwin ( index ) )
+ {
+ index++;
+ }
+ }
+
+ qsort ( & ( sort_edge[1] ), i - 1, sizeof ( sort_edge[1] ), cmp_seq );
+ index_array = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * ( num_ed + 1 ) ); // used to record new id
+ unsigned int new_index = 1, old_index;
+
+ for ( index = 1; index <= i - 1; index++ )
+ {
+ old_index = sort_edge[index].to_vt; // old id
+ sort_edge[index].seq = NULL;
+ index_array[old_index] = new_index++;// old id -> new id
+
+ if ( !EdSameAsTwin ( old_index ) )
+ {
+ index_array[old_index + 1] = new_index++; // old id -> new id
+ }
+ }
+
+ bool *copy_array = (bool * ) ckalloc ( sizeof ( bool ) * ( num_ed + 1 ) );
+ EDGE *old_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) );
+ EDGE *new_edge = ( EDGE * ) ckalloc ( sizeof ( EDGE ) );
+ unsigned int next_index;
+
+ for ( index = 1; index <= num_ed; index++ )
+ {
+ if(!copy_array[index])
+ {
+ next_index = index;
+ new_index = index_array[next_index];
+
+ if(!copy_array[next_index])// && next_index != new_index
+ {
+ if(copy_array[new_index])
+ {
+ fprintf(stderr, "Copy error: never reach here.");
+ }
+
+ copy_array[next_index] = 1;
+
+ if(next_index != new_index)
+ {
+ copyOneEdge (old_edge, &(edge_array[new_index]));
+ copyOneEdge ( & ( edge_array[new_index] ), & ( edge_array[next_index] ) );
+ }
+
+ updateArcToEd ( new_index );
+
+ next_index = new_index;
+ new_index = index_array[next_index];
+
+ while(!copy_array[next_index])
+ {
+ if(next_index == new_index)
+ {
+ fprintf(stderr, "Index error: never reach here.");
+ }
+
+ copy_array[next_index] = 1;
+ copyOneEdge (new_edge, &(edge_array[new_index]));
+ copyOneEdge ( & ( edge_array[new_index] ), old_edge);
+ updateArcToEd ( new_index );
+ copyOneEdge (old_edge, new_edge);
+
+ next_index = new_index;
+ new_index = index_array[next_index];
+ }
+ }
+ }
+ }
+
+ free (copy_array);
+ free (old_edge);
+ free (new_edge);
+ free ( index_array );
+ free ( sort_edge );
+ fprintf(stderr, "%d edge(s) sorted.\n", num_ed);
}
//*/
/*
@@ -2042,89 +2149,93 @@ Return:
*************************************************/
void delete0Edge()
{
- unsigned int i = 0;
- ARC * arc_left, *arc_right;
- arcBufferCount = 0;
- arcBuffer = ( unsigned int ** ) ckalloc ( sizeof ( unsigned int * ) * 3 );
- arcBuffer[0] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
- arcBuffer[1] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
- arcBuffer[2] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
-
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
- { continue; }
-
- if ( edge_array[i].length == 0 )
- {
- arc_left = edge_array[i + 1].arcs;
-
- while ( arc_left )
- {
- arc_right = edge_array[i].arcs;
-
- while ( arc_right )
- {
- arcBuffer[0][arcBufferCount] = getTwinEdge ( arc_left->to_ed );
- arcBuffer[1][arcBufferCount] = ( arc_left->multiplicity + arc_right->multiplicity + 1 ) / 2;
- arcBuffer[2][arcBufferCount++] = arc_right->to_ed;
- arc_right = arc_right->next;
- }
-
- arc_left = arc_left->next;
- }
- }
-
- ++i;
- }
-
- unsigned int from = 0;
- unsigned int multi = 0;
- unsigned int to = 0;
- int count_edgedelete = 0, count_arcadd = 0;
-
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
- { continue; }
-
- if ( edge_array[i].length == 0 )
- {
- destroyEdge2 ( i );
- count_edgedelete += 2;
- }
- }
-
- removeDeadArcs2();
-
- for ( i = 0; i < arcBufferCount; ++i )
- {
- from = arcBuffer[0][i];
- multi = arcBuffer[1][i];
- to = arcBuffer[2][i];
-
- if ( from == 0 || to == 0 )
- {
- fprintf ( stderr, "Error : Edge id is zero.\n" );
- continue;
- }
-
- if ( from > num_ed || to > num_ed )
- {
- fprintf ( stderr, "Error : Edge id is out of range.\n" );
- continue;
- }
-
- count_arcadd++;
- add1Arc2 ( from, to, multi );
- }
-
- arcBufferCount = 0;
- fprintf ( stderr, "%d edge(s) in length of 0, %d arc(s) added.\n", count_edgedelete, count_arcadd );
- free ( arcBuffer[0] );
- free ( arcBuffer[1] );
- free ( arcBuffer[2] );
- free ( arcBuffer );
+ unsigned int i = 0;
+ ARC *arc_left, *arc_right;
+ arcBufferCount = 0;
+ arcBuffer = ( unsigned int ** ) ckalloc ( sizeof ( unsigned int * ) * 3 );
+ arcBuffer[0] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
+ arcBuffer[1] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
+ arcBuffer[2] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
+ {
+ continue;
+ }
+
+ if ( edge_array[i].length == 0 )
+ {
+ arc_left = edge_array[i + 1].arcs;
+
+ while ( arc_left )
+ {
+ arc_right = edge_array[i].arcs;
+
+ while ( arc_right )
+ {
+ arcBuffer[0][arcBufferCount] = getTwinEdge ( arc_left->to_ed );
+ arcBuffer[1][arcBufferCount] = ( arc_left->multiplicity + arc_right->multiplicity + 1 ) / 2;
+ arcBuffer[2][arcBufferCount++] = arc_right->to_ed;
+ arc_right = arc_right->next;
+ }
+
+ arc_left = arc_left->next;
+ }
+ }
+
+ ++i;
+ }
+
+ unsigned int from = 0;
+ unsigned int multi = 0;
+ unsigned int to = 0;
+ int count_edgedelete = 0, count_arcadd = 0;
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
+ {
+ continue;
+ }
+
+ if ( edge_array[i].length == 0 )
+ {
+ destroyEdge2 ( i );
+ count_edgedelete += 2;
+ }
+ }
+
+ removeDeadArcs2();
+
+ for ( i = 0; i < arcBufferCount; ++i )
+ {
+ from = arcBuffer[0][i];
+ multi = arcBuffer[1][i];
+ to = arcBuffer[2][i];
+
+ if ( from == 0 || to == 0 )
+ {
+ fprintf ( stderr, "Error : Edge id is zero.\n" );
+ continue;
+ }
+
+ if ( from > num_ed || to > num_ed )
+ {
+ fprintf ( stderr, "Error : Edge id is out of range.\n" );
+ continue;
+ }
+
+ count_arcadd++;
+ add1Arc2 ( from, to, multi );
+ }
+
+ arcBufferCount = 0;
+ fprintf ( stderr, "%d edge(s) in length of 0, %d arc(s) added.\n", count_edgedelete, count_arcadd );
+ free ( arcBuffer[0] );
+ free ( arcBuffer[1] );
+ free ( arcBuffer[2] );
+ free ( arcBuffer );
}
/*************************************************
@@ -2143,28 +2254,28 @@ Return:
*************************************************/
void fresh ( int maxk )
{
- int num = 0;
- ARC * arc_temp, *parc;
- newfoundcount = 0;
- newnotfoundcount = 0;
- edgeaddnumber = 0;
- freshEdge ( maxk );
- fprintf ( stderr, "Refresh edge: %lld edge(s) added.\n", edgeaddnumber );
-
- if ( newnotfoundcount )
- {
- fprintf ( stderr, "Refresh edge: %d kmer(s) found.\n", newfoundcount );
- fprintf ( stderr, "Refresh edge: %d kmer(s) not found.\n", newnotfoundcount );
- }
-
- if ( overlaplen + step > maxk )
- {
- delete0Edge();
- }
-
- //swap the smaller one forward
- swapedge();
- compactEdgeArray();
+ int num = 0;
+ ARC *arc_temp, *parc;
+ newfoundcount = 0;
+ newnotfoundcount = 0;
+ edgeaddnumber = 0;
+ freshEdge ( maxk );
+ fprintf ( stderr, "Refresh edge: %lld edge(s) added.\n", edgeaddnumber );
+
+ if ( newnotfoundcount )
+ {
+ fprintf ( stderr, "Refresh edge: %d kmer(s) found.\n", newfoundcount );
+ fprintf ( stderr, "Refresh edge: %d kmer(s) not found.\n", newnotfoundcount );
+ }
+
+ if ( overlaplen + step > maxk )
+ {
+ delete0Edge();
+ }
+
+ //swap the smaller one forward
+ swapedge();
+ compactEdgeArray();
}
/*************************************************
@@ -2180,73 +2291,73 @@ Output:
Return:
None.
*************************************************/
-void statistics ( EDGE * ed_array, unsigned int ed_num )
+void statistics ( EDGE *ed_array, unsigned int ed_num )
{
- unsigned int i = 0;
- unsigned int * length_array;
- int flag, count, len_c;
- long long sum = 0, N90, N50;
- int signI;
- length_array = ( unsigned int * ) ckalloc ( ed_num * sizeof ( unsigned int ) );
- //first scan for number counting
- count = len_c = 0;
-
- for ( i = 1; i <= ed_num; i++ )
- {
- if ( ( ed_array[i].length + overlaplen - 1 ) >= len_bar )
- {
- length_array[len_c++] = ed_array[i].length + overlaplen - 1;
- }
-
- if ( ed_array[i].length < 1 || ed_array[i].deleted )
- {
- continue;
- }
-
- count++;
-
- if ( EdSmallerThanTwin ( i ) )
- {
- i++;
- }
- }
-
- sum = 0;
-
- for ( signI = len_c - 1; signI >= 0; signI-- )
- {
- sum += length_array[signI];
- }
-
- if ( len_c > 0 )
- {
- fprintf ( stderr, "\nThere are %d contig(s) longer than %d, sum up %lld bp, with average length %lld.\n", len_c, len_bar, sum, sum / len_c );
- }
-
- qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int );
- fprintf ( stderr, "The longest length is %d bp, ", length_array[len_c - 1] );
- N50 = sum * 0.5;
- N90 = sum * 0.9;
- sum = flag = 0;
-
- for ( signI = len_c - 1; signI >= 0; signI-- )
- {
- sum += length_array[signI];
-
- if ( !flag && sum >= N50 )
- {
- fprintf ( stderr, "contig N50 is %d bp, ", length_array[signI] );
- flag = 1;
- }
-
- if ( sum >= N90 )
- {
- fprintf ( stderr, "contig N90 is %d bp.\n", length_array[signI] );
- break;
- }
- }
-
- free ( ( void * ) length_array );
+ unsigned int i = 0;
+ unsigned int *length_array;
+ int flag, count, len_c;
+ long long sum = 0, N90, N50;
+ int signI;
+ length_array = ( unsigned int * ) ckalloc ( ed_num * sizeof ( unsigned int ) );
+ //first scan for number counting
+ count = len_c = 0;
+
+ for ( i = 1; i <= ed_num; i++ )
+ {
+ if ( ( ed_array[i].length + overlaplen - 1 ) >= len_bar )
+ {
+ length_array[len_c++] = ed_array[i].length + overlaplen - 1;
+ }
+
+ if ( ed_array[i].length < 1 || ed_array[i].deleted )
+ {
+ continue;
+ }
+
+ count++;
+
+ if ( EdSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+ }
+
+ sum = 0;
+
+ for ( signI = len_c - 1; signI >= 0; signI-- )
+ {
+ sum += length_array[signI];
+ }
+
+ if ( len_c > 0 )
+ {
+ fprintf ( stderr, "\nThere are %d contig(s) longer than %d, sum up %lld bp, with average length %lld.\n", len_c, len_bar, sum, sum / len_c );
+ }
+
+ qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int );
+ fprintf ( stderr, "The longest length is %d bp, ", length_array[len_c - 1] );
+ N50 = sum * 0.5;
+ N90 = sum * 0.9;
+ sum = flag = 0;
+
+ for ( signI = len_c - 1; signI >= 0; signI-- )
+ {
+ sum += length_array[signI];
+
+ if ( !flag && sum >= N50 )
+ {
+ fprintf ( stderr, "contig N50 is %d bp, ", length_array[signI] );
+ flag = 1;
+ }
+
+ if ( sum >= N90 )
+ {
+ fprintf ( stderr, "contig N90 is %d bp.\n", length_array[signI] );
+ break;
+ }
+ }
+
+ free ( ( void * ) length_array );
}
/*************************************************
@@ -2261,80 +2372,87 @@ Output:
Return:
Sorted arcs list.
*************************************************/
-ARC * sort_arc ( ARC * list )
+ARC *sort_arc ( ARC *list )
{
- if ( !list )
- { return list; }
-
-// ARC * head = ( ARC * ) malloc ( sizeof ( ARC ) );
- ARC * head = ( ARC * ) ckalloc ( sizeof ( ARC ));
- head->next = list;
- list->prev = head;
- ARC * curr = list;
- ARC * temp = list;
- ARC * temp1 = NULL;
-
- while ( curr )
- {
- temp = curr;
-
- if ( temp )
- {
- temp1 = temp->next;
-
- while ( temp1 )
- {
- if ( temp->to_ed > temp1->to_ed )
- { temp = temp1; }
-
- temp1 = temp1->next;
- }
- }
-
- if ( temp && temp != curr )
- {
- if ( temp->next )
- {
- temp->prev->next = temp->next;
- temp->next->prev = temp->prev;
- }
- else
- {
- temp->prev->next = NULL;
- }
-
- temp->next = curr;
- temp->prev = curr->prev;
- curr->prev->next = temp;
- curr->prev = temp;
- }
- else
- {
- curr = curr->next;
- }
- }
-
- list = head->next;
- list->prev = NULL;
- head->next = NULL;
- free ( head );
- return list;
+ if ( !list )
+ {
+ return list;
+ }
+
+ // ARC * head = ( ARC * ) malloc ( sizeof ( ARC ) );
+ ARC *head = ( ARC * ) ckalloc ( sizeof ( ARC ));
+ head->next = list;
+ list->prev = head;
+ ARC *curr = list;
+ ARC *temp = list;
+ ARC *temp1 = NULL;
+
+ while ( curr )
+ {
+ temp = curr;
+
+ if ( temp )
+ {
+ temp1 = temp->next;
+
+ while ( temp1 )
+ {
+ if ( temp->to_ed > temp1->to_ed )
+ {
+ temp = temp1;
+ }
+
+ temp1 = temp1->next;
+ }
+ }
+
+ if ( temp && temp != curr )
+ {
+ if ( temp->next )
+ {
+ temp->prev->next = temp->next;
+ temp->next->prev = temp->prev;
+ }
+ else
+ {
+ temp->prev->next = NULL;
+ }
+
+ temp->next = curr;
+ temp->prev = curr->prev;
+ curr->prev->next = temp;
+ curr->prev = temp;
+ }
+ else
+ {
+ curr = curr->next;
+ }
+ }
+
+ list = head->next;
+ list->prev = NULL;
+ head->next = NULL;
+ free ( head );
+ return list;
}
//Sort disorder arcs causing by multi thread.
void freshArc()
{
- unsigned int i;
- ARC * arc_temp, *parc;
+ unsigned int i;
+ ARC *arc_temp, *parc;
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted )
- { continue; }
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted )
+ {
+ continue;
+ }
- edge_array[i].arcs = sort_arc ( edge_array[i].arcs );
- }
- fprintf(stderr, "Arcs sorted.\n");
+ edge_array[i].arcs = sort_arc ( edge_array[i].arcs );
+ }
+
+ fprintf(stderr, "Arcs sorted.\n");
}
/*************************************************
@@ -2351,34 +2469,34 @@ Return:
*************************************************/
static void deleteUnlikeArc()
{
- unsigned int i, bal;
- ARC * arc_temp;
- int count = 0;
-
- for ( i = 0; i < delarcBufferCount; ++i )
- {
- arc_temp = getArcBetween ( delarcBuffer[0][i], delarcBuffer[1][i] );
-
- if ( arc_temp )
- {
- edge_array[delarcBuffer[0][i]].arcs = deleteArc ( edge_array[delarcBuffer[0][i]].arcs, arc_temp );
- ++count;
- }
-
- arc_temp = getArcBetween ( getTwinEdge ( delarcBuffer[1][i] ), getTwinEdge ( delarcBuffer[0][i] ) );
-
- if ( arc_temp )
- {
- edge_array[getTwinEdge ( delarcBuffer[1][i] )].arcs = deleteArc ( edge_array[getTwinEdge ( delarcBuffer[1][i] )].arcs, arc_temp );
- ++count;
- }
- }
-
- fprintf ( stderr, "%d unreliable arc(s) deleted.\n", count );
- free ( delarcBuffer[0] );
- free ( delarcBuffer[1] );
- free ( delarcBuffer );
- delarcBufferCount = 0;
+ unsigned int i, bal;
+ ARC *arc_temp;
+ int count = 0;
+
+ for ( i = 0; i < delarcBufferCount; ++i )
+ {
+ arc_temp = getArcBetween ( delarcBuffer[0][i], delarcBuffer[1][i] );
+
+ if ( arc_temp )
+ {
+ edge_array[delarcBuffer[0][i]].arcs = deleteArc ( edge_array[delarcBuffer[0][i]].arcs, arc_temp );
+ ++count;
+ }
+
+ arc_temp = getArcBetween ( getTwinEdge ( delarcBuffer[1][i] ), getTwinEdge ( delarcBuffer[0][i] ) );
+
+ if ( arc_temp )
+ {
+ edge_array[getTwinEdge ( delarcBuffer[1][i] )].arcs = deleteArc ( edge_array[getTwinEdge ( delarcBuffer[1][i] )].arcs, arc_temp );
+ ++count;
+ }
+ }
+
+ fprintf ( stderr, "%d unreliable arc(s) deleted.\n", count );
+ free ( delarcBuffer[0] );
+ free ( delarcBuffer[1] );
+ free ( delarcBuffer );
+ delarcBufferCount = 0;
}
/*************************************************
@@ -2396,32 +2514,38 @@ Return:
*************************************************/
static void forward ( unsigned int index, int first )
{
- ARC * fArc, *temp;
- fArc = edge_array[index].arcs;
- unsigned int twin = getTwinEdge ( index );
- // if(!EdSameAsTwin(index))
- {
- if ( edge_array[index].multi != 1 )
- { edge_array[index].multi = 2; }
-
- if ( edge_array[twin].multi != 1 )
- { edge_array[twin].multi = 2; }
- }
- edge_array[index].flag = 1;
- edge_array[twin].flag = 1;
-
- while ( fArc )
- {
- temp = fArc;
- fArc = fArc->next;
- delarcBuffer[0][delarcBufferCount] = index;
- delarcBuffer[1][delarcBufferCount++] = temp->to_ed;
-
- if ( edge_array[temp->to_ed].flag )
- { continue; }
-
- forward ( getTwinEdge ( temp->to_ed ), 0 );
- }
+ ARC *fArc, *temp;
+ fArc = edge_array[index].arcs;
+ unsigned int twin = getTwinEdge ( index );
+ // if(!EdSameAsTwin(index))
+ {
+ if ( edge_array[index].multi != 1 )
+ {
+ edge_array[index].multi = 2;
+ }
+
+ if ( edge_array[twin].multi != 1 )
+ {
+ edge_array[twin].multi = 2;
+ }
+ }
+ edge_array[index].flag = 1;
+ edge_array[twin].flag = 1;
+
+ while ( fArc )
+ {
+ temp = fArc;
+ fArc = fArc->next;
+ delarcBuffer[0][delarcBufferCount] = index;
+ delarcBuffer[1][delarcBufferCount++] = temp->to_ed;
+
+ if ( edge_array[temp->to_ed].flag )
+ {
+ continue;
+ }
+
+ forward ( getTwinEdge ( temp->to_ed ), 0 );
+ }
}
/*************************************************
@@ -2438,54 +2562,58 @@ Return:
*************************************************/
static void getUnlikeArc()
{
- unsigned int i, bal;
-
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted )
- { continue; }
-
- if ( EdSameAsTwin ( i ) )
- {
- edge_array[i].multi = 1;
- }
- }
-
- delarcBuffer = ( unsigned int ** ) ckalloc ( sizeof ( unsigned int * ) * 2 );
- delarcBuffer[0] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
- delarcBuffer[1] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
- unsigned int last = 0, curr = 0;
-
- for ( i = 1; i <= num_ed; ++i )
- {
- if ( edge_array[i].deleted )
- { continue; }
-
- if ( edge_array[i].multi == 1 )
- {
- last = delarcBufferCount;
- forward ( i, 1 );
-
- if ( !EdSameAsTwin ( i ) )
- {
- forward ( getTwinEdge ( i ), 1 );
- ++i;
- }
-
- curr = delarcBufferCount;
- unsigned int j;
- edge_array[i].flag = 0;
- edge_array[getTwinEdge ( i )].flag = 0;
-
- for ( j = last; j < curr; ++j )
- {
- edge_array[delarcBuffer[0][j]].flag = 0;
- edge_array[getTwinEdge ( delarcBuffer[0][j] )].flag = 0;
- edge_array[delarcBuffer[1][j]].flag = 0;
- edge_array[getTwinEdge ( delarcBuffer[1][j] )].flag = 0;
- }
- }
- }
+ unsigned int i, bal;
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted )
+ {
+ continue;
+ }
+
+ if ( EdSameAsTwin ( i ) )
+ {
+ edge_array[i].multi = 1;
+ }
+ }
+
+ delarcBuffer = ( unsigned int ** ) ckalloc ( sizeof ( unsigned int * ) * 2 );
+ delarcBuffer[0] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
+ delarcBuffer[1] = ( unsigned int * ) ckalloc ( sizeof ( unsigned int ) * num_ed * 3 );
+ unsigned int last = 0, curr = 0;
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ if ( edge_array[i].deleted )
+ {
+ continue;
+ }
+
+ if ( edge_array[i].multi == 1 )
+ {
+ last = delarcBufferCount;
+ forward ( i, 1 );
+
+ if ( !EdSameAsTwin ( i ) )
+ {
+ forward ( getTwinEdge ( i ), 1 );
+ ++i;
+ }
+
+ curr = delarcBufferCount;
+ unsigned int j;
+ edge_array[i].flag = 0;
+ edge_array[getTwinEdge ( i )].flag = 0;
+
+ for ( j = last; j < curr; ++j )
+ {
+ edge_array[delarcBuffer[0][j]].flag = 0;
+ edge_array[getTwinEdge ( delarcBuffer[0][j] )].flag = 0;
+ edge_array[delarcBuffer[1][j]].flag = 0;
+ edge_array[getTwinEdge ( delarcBuffer[1][j] )].flag = 0;
+ }
+ }
+ }
}
/*************************************************
@@ -2507,100 +2635,100 @@ Output:
Return:
None.
*************************************************/
-void Iterate ( char * libfile, char * graph, int maxk, int M ) //boolean keepReadFile,
+void Iterate ( char *libfile, char *graph, int maxk, int M ) //boolean keepReadFile,
{
- time_t start_t, stop_t, time_bef, time_aft, inner_start, inner_stop;
- time ( &start_t );
- unsigned int i;
-
- for ( i = 1; i <= num_ed; ++i )
- {
- edge_array[i].multi = 0;
- }
-
- int cutlen = 2 * overlaplen;
- int mink = overlaplen;
- overlaplen += step;
- nowstep2 = step;
- int flag = 0;
- statistics ( edge_array, num_ed );
- fprintf ( stderr, "\nIteration start.\n" );
- int round = 1;
-
- while ( overlaplen <= maxk )
- {
- unsigned int j;
- time ( &inner_start );
- WORDFILTER = createFilter ( overlaplen );
- fprintf ( stderr, "\n***************************\n" );
- fprintf ( stderr, "Iteration %d, kmer: %d\n", round++, overlaplen );
- fprintf ( stderr, "Edge number: %d\n", num_ed );
- time ( &time_bef );
- //build (k+1)mer graph
- fprintf ( stderr, "Construct %dmer graph.\n", overlaplen );
- buildGraphHash();
- time ( &time_aft );
- fprintf ( stderr, "Time spent on building hash graph: %ds.\n", ( int ) ( time_aft - time_bef ) );
- time ( &time_bef );
- //add arcs for (k+1)mer graph
- fprintf ( stderr, "\nAdd arcs to graph.\n" );
- addArc ( libfile, graph, flag, maxk - overlaplen, maxk ); //, keepReadFile
- //get arcs that could be processed incorrectly
- getUnlikeArc();
- //delete this arcs
- deleteUnlikeArc();
- flag++;
- time ( &time_aft );
- fprintf ( stderr, "Time spent on adding arcs: %ds.\n", ( int ) ( time_aft - time_bef ) );
- time ( &time_bef );
- //sort disorder arcs causing by multi thread
- fprintf ( stderr, "Sort arcs.\n" );
- freshArc();
- time ( &time_aft );
- fprintf ( stderr, "Time spent on sorting arcs: %ds.\n", ( int ) ( time_aft - time_bef ) );
-
- if ( deLowEdge )
- {
- time ( &time_bef );
- fprintf ( stderr, "\nRemove weak edges and low coverage edges.\n" );
- removeWeakEdges2 ( cutlen, 1, mink );
- removeLowCovEdges2 ( cutlen, deLowEdge, mink, 0 );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on removing Edges: %ds\n", ( int ) ( time_aft - time_bef ) );
- }
-
- if ( overlaplen + step > maxk )
- {
- time ( &time_bef );
- fprintf ( stderr, "Cut tips of the graph.\n" );
- cutTipsInGraph2 ( cutlen, 0, 0 );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on cutting tips: %ds.\n", ( int ) ( time_aft - time_bef ) );
- }
-
- time ( &time_bef );
- fprintf ( stderr, "Refresh edges.\n" );
- //refresh to extend edge and get the edge order right
- fresh ( maxk );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on refreshing edges: %ds.\n", ( int ) ( time_aft - time_bef ) );
- //free kmer set
- free_kmerset2 ( KmerSetsNew );
- overlaplen += step;
- nowstep2 += step;
- statistics ( edge_array, num_ed );
- time ( &inner_stop );
- fprintf ( stderr, "Time spent on this round: %dm.\n\n", ( int ) ( inner_stop - inner_start ) / 60 );
- }
-
- for ( i = 1; i <= num_ed; ++i )
- {
- edge_array[i].multi = 0;
- }
-
- overlaplen = maxk;
- time ( &stop_t );
- fprintf ( stderr, "Iteration finished.\n" );
- fprintf ( stderr, "Time spent on iteration: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
+ time_t start_t, stop_t, time_bef, time_aft, inner_start, inner_stop;
+ time ( &start_t );
+ unsigned int i;
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ edge_array[i].multi = 0;
+ }
+
+ int cutlen = 2 * overlaplen;
+ int mink = overlaplen;
+ overlaplen += step;
+ nowstep2 = step;
+ int flag = 0;
+ statistics ( edge_array, num_ed );
+ fprintf ( stderr, "\nIteration start.\n" );
+ int round = 1;
+
+ while ( overlaplen <= maxk )
+ {
+ unsigned int j;
+ time ( &inner_start );
+ WORDFILTER = createFilter ( overlaplen );
+ fprintf ( stderr, "\n***************************\n" );
+ fprintf ( stderr, "Iteration %d, kmer: %d\n", round++, overlaplen );
+ fprintf ( stderr, "Edge number: %d\n", num_ed );
+ time ( &time_bef );
+ //build (k+1)mer graph
+ fprintf ( stderr, "Construct %dmer graph.\n", overlaplen );
+ buildGraphHash();
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on building hash graph: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ time ( &time_bef );
+ //add arcs for (k+1)mer graph
+ fprintf ( stderr, "\nAdd arcs to graph.\n" );
+ addArc ( libfile, graph, flag, maxk - overlaplen, maxk ); //, keepReadFile
+ //get arcs that could be processed incorrectly
+ getUnlikeArc();
+ //delete this arcs
+ deleteUnlikeArc();
+ flag++;
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on adding arcs: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ time ( &time_bef );
+ //sort disorder arcs causing by multi thread
+ fprintf ( stderr, "Sort arcs.\n" );
+ freshArc();
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on sorting arcs: %ds.\n", ( int ) ( time_aft - time_bef ) );
+
+ if ( deLowEdge )
+ {
+ time ( &time_bef );
+ fprintf ( stderr, "\nRemove weak edges and low coverage edges.\n" );
+ removeWeakEdges2 ( cutlen, 1, mink );
+ removeLowCovEdges2 ( cutlen, deLowEdge, mink, 0 );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on removing Edges: %ds\n", ( int ) ( time_aft - time_bef ) );
+ }
+
+ if ( overlaplen + step > maxk )
+ {
+ time ( &time_bef );
+ fprintf ( stderr, "Cut tips of the graph.\n" );
+ cutTipsInGraph2 ( cutlen, 0, 0 );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on cutting tips: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ }
+
+ time ( &time_bef );
+ fprintf ( stderr, "Refresh edges.\n" );
+ //refresh to extend edge and get the edge order right
+ fresh ( maxk );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on refreshing edges: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ //free kmer set
+ free_kmerset2 ( KmerSetsNew );
+ overlaplen += step;
+ nowstep2 += step;
+ statistics ( edge_array, num_ed );
+ time ( &inner_stop );
+ fprintf ( stderr, "Time spent on this round: %dm.\n\n", ( int ) ( inner_stop - inner_start ) / 60 );
+ }
+
+ for ( i = 1; i <= num_ed; ++i )
+ {
+ edge_array[i].multi = 0;
+ }
+
+ overlaplen = maxk;
+ time ( &stop_t );
+ fprintf ( stderr, "Iteration finished.\n" );
+ fprintf ( stderr, "Time spent on iteration: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
}
diff --git a/standardPregraph/kmer.c b/standardPregraph/kmer.c
index 8a80ca4..954d8bf 100644
--- a/standardPregraph/kmer.c
+++ b/standardPregraph/kmer.c
@@ -1,7 +1,7 @@
/*
* kmer.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -27,9 +27,9 @@
#include "extvab.h"
#ifdef MER127
-void PrintKmer ( FILE * fp, Kmer kmer )
+void PrintKmer ( FILE *fp, Kmer kmer )
{
- fprintf ( fp, "%llx %llx %llx %llx", kmer.high1, kmer.low1, kmer.high2, kmer.low2 );
+ fprintf ( fp, "%llx %llx %llx %llx", kmer.high1, kmer.low1, kmer.high2, kmer.low2 );
}
/*************************************************
@@ -47,28 +47,28 @@ Return:
*************************************************/
boolean KmerSmaller ( Kmer kmer1, Kmer kmer2 )
{
- if ( kmer1.high1 != kmer2.high1 )
- {
- return ( kmer1.high1 < kmer2.high1 );
- }
- else
- {
- if ( kmer1.low1 != kmer2.low1 )
- {
- return ( kmer1.low1 < kmer2.low1 );
- }
- else
- {
- if ( kmer1.high2 != kmer2.high2 )
- {
- return ( kmer1.high2 < kmer2.high2 );
- }
- else
- {
- return ( kmer1.low2 < kmer2.low2 );
- }
- }
- }
+ if ( kmer1.high1 != kmer2.high1 )
+ {
+ return ( kmer1.high1 < kmer2.high1 );
+ }
+ else
+ {
+ if ( kmer1.low1 != kmer2.low1 )
+ {
+ return ( kmer1.low1 < kmer2.low1 );
+ }
+ else
+ {
+ if ( kmer1.high2 != kmer2.high2 )
+ {
+ return ( kmer1.high2 < kmer2.high2 );
+ }
+ else
+ {
+ return ( kmer1.low2 < kmer2.low2 );
+ }
+ }
+ }
}
/*************************************************
@@ -86,28 +86,28 @@ Return:
*************************************************/
boolean KmerLarger ( Kmer kmer1, Kmer kmer2 )
{
- if ( kmer1.high1 != kmer2.high1 )
- {
- return ( kmer1.high1 > kmer2.high1 );
- }
- else
- {
- if ( kmer1.low1 != kmer2.low1 )
- {
- return ( kmer1.low1 > kmer2.low1 );
- }
- else
- {
- if ( kmer1.high2 != kmer2.high2 )
- {
- return ( kmer1.high2 > kmer2.high2 );
- }
- else
- {
- return ( kmer1.low2 > kmer2.low2 );
- }
- }
- }
+ if ( kmer1.high1 != kmer2.high1 )
+ {
+ return ( kmer1.high1 > kmer2.high1 );
+ }
+ else
+ {
+ if ( kmer1.low1 != kmer2.low1 )
+ {
+ return ( kmer1.low1 > kmer2.low1 );
+ }
+ else
+ {
+ if ( kmer1.high2 != kmer2.high2 )
+ {
+ return ( kmer1.high2 > kmer2.high2 );
+ }
+ else
+ {
+ return ( kmer1.low2 > kmer2.low2 );
+ }
+ }
+ }
}
/*************************************************
@@ -125,14 +125,14 @@ Return:
*************************************************/
boolean KmerEqual ( Kmer kmer1, Kmer kmer2 )
{
- if ( kmer1.low2 != kmer2.low2 || kmer1.high2 != kmer2.high2 || kmer1.low1 != kmer2.low1 || kmer1.high1 != kmer2.high1 )
- {
- return 0;
- }
- else
- {
- return 1;
- }
+ if ( kmer1.low2 != kmer2.low2 || kmer1.high2 != kmer2.high2 || kmer1.low1 != kmer2.low1 || kmer1.high1 != kmer2.high1 )
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
}
/*************************************************
@@ -150,11 +150,11 @@ Return:
*************************************************/
Kmer KmerAnd ( Kmer kmer1, Kmer kmer2 )
{
- kmer1.high1 &= kmer2.high1;
- kmer1.low1 &= kmer2.low1;
- kmer1.high2 &= kmer2.high2;
- kmer1.low2 &= kmer2.low2;
- return kmer1;
+ kmer1.high1 &= kmer2.high1;
+ kmer1.low1 &= kmer2.low1;
+ kmer1.high2 &= kmer2.high2;
+ kmer1.low2 &= kmer2.low2;
+ return kmer1;
}
/*************************************************
@@ -171,11 +171,11 @@ Return:
*************************************************/
Kmer KmerLeftBitMoveBy2 ( Kmer word )
{
- word.high1 = ( word.high1 << 2 ) | ( word.low1 >> 62 );
- word.low1 = ( word.low1 << 2 ) | ( word.high2 >> 62 );
- word.high2 = ( word.high2 << 2 ) | ( word.low2 >> 62 );
- word.low2 <<= 2;
- return word;
+ word.high1 = ( word.high1 << 2 ) | ( word.low1 >> 62 );
+ word.low1 = ( word.low1 << 2 ) | ( word.high2 >> 62 );
+ word.high2 = ( word.high2 << 2 ) | ( word.low2 >> 62 );
+ word.low2 <<= 2;
+ return word;
}
/*************************************************
@@ -192,11 +192,11 @@ Return:
*************************************************/
Kmer KmerRightBitMoveBy2 ( Kmer word )
{
- word.low2 = ( word.low2 >> 2 ) | ( word.high2 & 0x3 ) << 62;
- word.high2 = ( word.high2 >> 2 ) | ( word.low1 & 0x3 ) << 62;
- word.low1 = ( word.low1 >> 2 ) | ( word.high1 & 0x3 ) << 62;
- word.high1 >>= 2;
- return word;
+ word.low2 = ( word.low2 >> 2 ) | ( word.high2 & 0x3 ) << 62;
+ word.high2 = ( word.high2 >> 2 ) | ( word.low1 & 0x3 ) << 62;
+ word.low1 = ( word.low1 >> 2 ) | ( word.high1 & 0x3 ) << 62;
+ word.high1 >>= 2;
+ return word;
}
/*************************************************
@@ -214,9 +214,9 @@ Return:
*************************************************/
Kmer KmerPlus ( Kmer prev, char ch )
{
- Kmer word = KmerLeftBitMoveBy2 ( prev );
- word.low2 |= ch;
- return word;
+ Kmer word = KmerLeftBitMoveBy2 ( prev );
+ word.low2 |= ch;
+ return word;
}
/*************************************************
@@ -234,10 +234,10 @@ Return:
*************************************************/
Kmer nextKmer ( Kmer prev, char ch )
{
- Kmer word = KmerLeftBitMoveBy2 ( prev );
- word = KmerAnd ( word, WORDFILTER );
- word.low2 |= ch;
- return word;
+ Kmer word = KmerLeftBitMoveBy2 ( prev );
+ word = KmerAnd ( word, WORDFILTER );
+ word.low2 |= ch;
+ return word;
}
/*************************************************
@@ -255,25 +255,28 @@ Return:
*************************************************/
Kmer prevKmer ( Kmer next, char ch )
{
- Kmer word = KmerRightBitMoveBy2 ( next );
+ Kmer word = KmerRightBitMoveBy2 ( next );
- switch ( overlaplen )
- {
- case 1 ... 32:
- word.low2 |= ( ( ( ubyte8 ) ch ) << 2 * ( overlaplen - 1 ) );
- break;
- case 33 ... 64:
- word.high2 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlaplen - 1 ) - 64 );
- break;
- case 65 ... 96:
- word.low1 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlaplen - 1 ) - 128 );
- break;
- case 97 ... 128:
- word.high1 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlaplen - 1 ) - 192 );
- break;
- }
+ switch ( overlaplen )
+ {
+ case 1 ... 32:
+ word.low2 |= ( ( ( ubyte8 ) ch ) << 2 * ( overlaplen - 1 ) );
+ break;
- return word;
+ case 33 ... 64:
+ word.high2 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlaplen - 1 ) - 64 );
+ break;
+
+ case 65 ... 96:
+ word.low1 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlaplen - 1 ) - 128 );
+ break;
+
+ case 97 ... 128:
+ word.high1 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlaplen - 1 ) - 192 );
+ break;
+ }
+
+ return word;
}
/*************************************************
@@ -290,7 +293,7 @@ Return:
*************************************************/
char lastCharInKmer ( Kmer kmer )
{
- return ( char ) ( kmer.low2 & 0x3 );
+ return ( char ) ( kmer.low2 & 0x3 );
}
/*************************************************
@@ -307,21 +310,24 @@ Return:
*************************************************/
char firstCharInKmer ( Kmer kmer )
{
- switch ( overlaplen )
- {
- case 1 ... 32:
- kmer.low2 >>= 2 * ( overlaplen - 1 );
- return kmer.low2; // & 3;
- case 33 ... 64:
- kmer.high2 >>= 2 * ( overlaplen - 1 ) - 64;
- return kmer.high2; // & 3;
- case 65 ... 96:
- kmer.low1 >>= 2 * ( overlaplen - 1 ) - 128;
- return kmer.low1;
- case 97 ... 128:
- kmer.high1 >>= 2 * ( overlaplen - 1 ) - 192;
- return kmer.high1;
- }
+ switch ( overlaplen )
+ {
+ case 1 ... 32:
+ kmer.low2 >>= 2 * ( overlaplen - 1 );
+ return kmer.low2; // & 3;
+
+ case 33 ... 64:
+ kmer.high2 >>= 2 * ( overlaplen - 1 ) - 64;
+ return kmer.high2; // & 3;
+
+ case 65 ... 96:
+ kmer.low1 >>= 2 * ( overlaplen - 1 ) - 128;
+ return kmer.low1;
+
+ case 97 ... 128:
+ kmer.high1 >>= 2 * ( overlaplen - 1 ) - 192;
+ return kmer.high1;
+ }
}
/*************************************************
@@ -338,29 +344,32 @@ Return:
*************************************************/
Kmer createFilter ( int overlaplen )
{
- Kmer word;
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
-
- switch ( overlaplen )
- {
- case 1 ... 31:
- word.low2 = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen ) ) - 1;
- break;
- case 32 ... 63:
- word.low2 = ~word.low2;
- word.high2 = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen - 64 ) ) - 1;
- break;
- case 64 ... 95:
- word.high2 = word.low2 = ~word.low2;
- word.low1 = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen - 128 ) ) - 1;
- break;
- case 96 ... 127:
- word.low1 = word.high2 = word.low2 = ~word.low2;
- word.high1 = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen - 192 ) ) - 1;
- break;
- }
-
- return word;
+ Kmer word;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
+
+ switch ( overlaplen )
+ {
+ case 1 ... 31:
+ word.low2 = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen ) ) - 1;
+ break;
+
+ case 32 ... 63:
+ word.low2 = ~word.low2;
+ word.high2 = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen - 64 ) ) - 1;
+ break;
+
+ case 64 ... 95:
+ word.high2 = word.low2 = ~word.low2;
+ word.low1 = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen - 128 ) ) - 1;
+ break;
+
+ case 96 ... 127:
+ word.low1 = word.high2 = word.low2 = ~word.low2;
+ word.high1 = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen - 192 ) ) - 1;
+ break;
+ }
+
+ return word;
}
/*************************************************
@@ -378,35 +387,38 @@ Return:
*************************************************/
Kmer KmerRightBitMove ( Kmer word, int dis )
{
- ubyte8 mask;
-
- switch ( dis )
- {
- case 0 ... 63:
- mask = ( ( ( ubyte8 ) 1 ) << dis ) - 1;
- word.low2 = ( word.low2 >> dis ) | ( word.high2 & mask ) << ( 64 - dis );
- word.high2 = ( word.high2 >> dis ) | ( word.low1 & mask ) << ( 64 - dis );
- word.low1 = ( word.low1 >> dis ) | ( word.high1 & mask ) << ( 64 - dis );
- word.high1 >>= dis;
- return word;
- case 64 ... 127:
- mask = ( ( ( ubyte8 ) 1 ) << ( dis - 64 ) ) - 1;
- word.low2 = word.high2 >> ( dis - 64 ) | ( word.low1 & mask ) << ( 128 - dis );
- word.high2 = word.low1 >> ( dis - 64 ) | ( word.high1 & mask ) << ( 128 - dis );
- word.low1 = word.high1 >> ( dis - 64 );
- word.high1 = 0;
- return word;
- case 128 ... 191:
- mask = ( ( ( ubyte8 ) 1 ) << ( dis - 128 ) ) - 1;
- word.low2 = word.low1 >> ( dis - 128 ) | ( word.high1 & mask ) << ( 192 - dis );
- word.high2 = word.high1 >> ( dis - 128 );
- word.high1 = word.low1 = 0;
- return word;
- case 192 ... 255:
- word.low2 = word.high1 >> ( dis - 192 );
- word.high1 = word.low1 = word.high2 = 0;
- return word;
- }
+ ubyte8 mask;
+
+ switch ( dis )
+ {
+ case 0 ... 63:
+ mask = ( ( ( ubyte8 ) 1 ) << dis ) - 1;
+ word.low2 = ( word.low2 >> dis ) | ( word.high2 & mask ) << ( 64 - dis );
+ word.high2 = ( word.high2 >> dis ) | ( word.low1 & mask ) << ( 64 - dis );
+ word.low1 = ( word.low1 >> dis ) | ( word.high1 & mask ) << ( 64 - dis );
+ word.high1 >>= dis;
+ return word;
+
+ case 64 ... 127:
+ mask = ( ( ( ubyte8 ) 1 ) << ( dis - 64 ) ) - 1;
+ word.low2 = word.high2 >> ( dis - 64 ) | ( word.low1 & mask ) << ( 128 - dis );
+ word.high2 = word.low1 >> ( dis - 64 ) | ( word.high1 & mask ) << ( 128 - dis );
+ word.low1 = word.high1 >> ( dis - 64 );
+ word.high1 = 0;
+ return word;
+
+ case 128 ... 191:
+ mask = ( ( ( ubyte8 ) 1 ) << ( dis - 128 ) ) - 1;
+ word.low2 = word.low1 >> ( dis - 128 ) | ( word.high1 & mask ) << ( 192 - dis );
+ word.high2 = word.high1 >> ( dis - 128 );
+ word.high1 = word.low1 = 0;
+ return word;
+
+ case 192 ... 255:
+ word.low2 = word.high1 >> ( dis - 192 );
+ word.high1 = word.low1 = word.high2 = 0;
+ return word;
+ }
}
/*************************************************
@@ -422,71 +434,74 @@ Output:
Return:
None.
*************************************************/
-void printKmerSeq ( FILE * fp, Kmer kmer )
-{
- int i, bit1, bit2, bit3, bit4;
- bit4 = bit3 = bit2 = bit1 = 0;
- char kmerSeq[128];
-
- switch ( overlaplen )
- {
- case 1 ... 31:
- bit4 = overlaplen;
- break;
- case 32 ... 63:
- bit4 = 32;
- bit3 = overlaplen - 32;
- break;
- case 64 ... 95:
- bit4 = bit3 = 32;
- bit2 = overlaplen - 64;
- break;
- case 96 ... 127:
- bit4 = bit3 = bit2 = 32;
- bit1 = overlaplen - 96;
- break;
- }
-
- for ( i = bit1 - 1; i >= 0; i-- )
- {
- kmerSeq[i] = kmer.high1 & 0x3;
- kmer.high1 >>= 2;
- }
-
- for ( i = bit2 - 1; i >= 0; i-- )
- {
- kmerSeq[i + bit1] = kmer.low1 & 0x3;
- kmer.low1 >>= 2;
- }
-
- for ( i = bit3 - 1; i >= 0; i-- )
- {
- kmerSeq[i + bit1 + bit2] = kmer.high2 & 0x3;
- kmer.high2 >>= 2;
- }
-
- for ( i = bit4 - 1; i >= 0; i-- )
- {
- kmerSeq[i + bit1 + bit2 + bit3] = kmer.low2 & 0x3;
- kmer.low2 >>= 2;
- }
-
- for ( i = 0; i < overlaplen; i++ )
- {
- fprintf ( fp, "%c", int2base ( ( int ) kmerSeq[i] ) );
- }
-}
-
-void print_kmer ( FILE * fp, Kmer kmer, char c )
-{
- fprintf ( fp, "%llx %llx %llx %llx", kmer.high1, kmer.low1, kmer.high2, kmer.low2 );
- fprintf ( fp, "%c", c );
-}
-
-void print_kmer_gz ( gzFile * fp, Kmer kmer, char c )
-{
- gzprintf ( fp, "%llx %llx %llx %llx", kmer.high1, kmer.low1, kmer.high2, kmer.low2 );
- gzprintf ( fp, "%c", c );
+void printKmerSeq ( FILE *fp, Kmer kmer )
+{
+ int i, bit1, bit2, bit3, bit4;
+ bit4 = bit3 = bit2 = bit1 = 0;
+ char kmerSeq[128];
+
+ switch ( overlaplen )
+ {
+ case 1 ... 31:
+ bit4 = overlaplen;
+ break;
+
+ case 32 ... 63:
+ bit4 = 32;
+ bit3 = overlaplen - 32;
+ break;
+
+ case 64 ... 95:
+ bit4 = bit3 = 32;
+ bit2 = overlaplen - 64;
+ break;
+
+ case 96 ... 127:
+ bit4 = bit3 = bit2 = 32;
+ bit1 = overlaplen - 96;
+ break;
+ }
+
+ for ( i = bit1 - 1; i >= 0; i-- )
+ {
+ kmerSeq[i] = kmer.high1 & 0x3;
+ kmer.high1 >>= 2;
+ }
+
+ for ( i = bit2 - 1; i >= 0; i-- )
+ {
+ kmerSeq[i + bit1] = kmer.low1 & 0x3;
+ kmer.low1 >>= 2;
+ }
+
+ for ( i = bit3 - 1; i >= 0; i-- )
+ {
+ kmerSeq[i + bit1 + bit2] = kmer.high2 & 0x3;
+ kmer.high2 >>= 2;
+ }
+
+ for ( i = bit4 - 1; i >= 0; i-- )
+ {
+ kmerSeq[i + bit1 + bit2 + bit3] = kmer.low2 & 0x3;
+ kmer.low2 >>= 2;
+ }
+
+ for ( i = 0; i < overlaplen; i++ )
+ {
+ fprintf ( fp, "%c", int2base ( ( int ) kmerSeq[i] ) );
+ }
+}
+
+void print_kmer ( FILE *fp, Kmer kmer, char c )
+{
+ fprintf ( fp, "%llx %llx %llx %llx", kmer.high1, kmer.low1, kmer.high2, kmer.low2 );
+ fprintf ( fp, "%c", c );
+}
+
+void print_kmer_gz ( gzFile *fp, Kmer kmer, char c )
+{
+ gzprintf ( fp, "%llx %llx %llx %llx", kmer.high1, kmer.low1, kmer.high2, kmer.low2 );
+ gzprintf ( fp, "%c", c );
}
static const ubyte2 BitReverseTable[65536] =
@@ -498,7 +513,7 @@ static const ubyte2 BitReverseTable[65536] =
# define R10(n) R8(n), R8(n + 1*64 ), R8(n + 2*64 ), R8(n + 3*64 )
# define R12(n) R10(n),R10(n + 1*16), R10(n + 2*16 ), R10(n + 3*16 )
# define R14(n) R12(n),R12(n + 1*4 ), R12(n + 2*4 ), R12(n + 3*4 )
- R14 ( 0 ), R14 ( 1 ), R14 ( 2 ), R14 ( 3 )
+ R14 ( 0 ), R14 ( 1 ), R14 ( 2 ), R14 ( 3 )
};
/*************************************************
@@ -516,327 +531,327 @@ Return:
*************************************************/
static Kmer fastReverseComp ( Kmer seq, char seq_size )
{
- seq.low2 ^= 0xAAAAAAAAAAAAAAAALLU;
- seq.low2 = ( ( ubyte8 ) BitReverseTable[seq.low2 & 0xffff] << 48 ) |
- ( ( ubyte8 ) BitReverseTable[ ( seq.low2 >> 16 ) & 0xffff] << 32 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.low2 >> 32 ) & 0xffff] << 16 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.low2 >> 48 ) & 0xffff] );
-
- if ( seq_size < 32 )
- {
- seq.low2 >>= ( 64 - ( seq_size << 1 ) );
- return seq;
- }
-
- seq.high2 ^= 0xAAAAAAAAAAAAAAAALLU;
- seq.high2 = ( ( ubyte8 ) BitReverseTable[seq.high2 & 0xffff] << 48 ) |
- ( ( ubyte8 ) BitReverseTable[ ( seq.high2 >> 16 ) & 0xffff] << 32 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.high2 >> 32 ) & 0xffff] << 16 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.high2 >> 48 ) & 0xffff] );
-
- if ( seq_size < 64 )
- {
- seq.high2 = seq.high2 ^ seq.low2;
- seq.low2 = seq.high2 ^ seq.low2;
- seq.high2 = seq.high2 ^ seq.low2;
- seq = KmerRightBitMove ( seq, 128 - ( seq_size << 1 ) );
- return seq;
- }
-
- seq.low1 ^= 0xAAAAAAAAAAAAAAAALLU;
- seq.low1 = ( ( ubyte8 ) BitReverseTable[seq.low1 & 0xffff] << 48 ) |
- ( ( ubyte8 ) BitReverseTable[ ( seq.low1 >> 16 ) & 0xffff] << 32 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.low1 >> 32 ) & 0xffff] << 16 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.low1 >> 48 ) & 0xffff] );
-
- if ( seq_size < 96 )
- {
- seq.low1 = seq.low1 ^ seq.low2;
- seq.low2 = seq.low1 ^ seq.low2;
- seq.low1 = seq.low1 ^ seq.low2;
- seq = KmerRightBitMove ( seq, 192 - ( seq_size << 1 ) );
- return seq;
- }
-
- seq.high1 ^= 0xAAAAAAAAAAAAAAAALLU;
- seq.high1 = ( ( ubyte8 ) BitReverseTable[seq.high1 & 0xffff] << 48 ) |
- ( ( ubyte8 ) BitReverseTable[ ( seq.high1 >> 16 ) & 0xffff] << 32 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.high1 >> 32 ) & 0xffff] << 16 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.high1 >> 48 ) & 0xffff] );
- seq.low1 = seq.low1 ^ seq.high2;
- seq.high2 = seq.low1 ^ seq.high2;
- seq.low1 = seq.low1 ^ seq.high2;
- seq.low2 = seq.low2 ^ seq.high1;
- seq.high1 = seq.low2 ^ seq.high1;
- seq.low2 = seq.low2 ^ seq.high1;
- seq = KmerRightBitMove ( seq, 256 - ( seq_size << 1 ) );
- return seq;
+ seq.low2 ^= 0xAAAAAAAAAAAAAAAALLU;
+ seq.low2 = ( ( ubyte8 ) BitReverseTable[seq.low2 & 0xffff] << 48 ) |
+ ( ( ubyte8 ) BitReverseTable[ ( seq.low2 >> 16 ) & 0xffff] << 32 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.low2 >> 32 ) & 0xffff] << 16 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.low2 >> 48 ) & 0xffff] );
+
+ if ( seq_size < 32 )
+ {
+ seq.low2 >>= ( 64 - ( seq_size << 1 ) );
+ return seq;
+ }
+
+ seq.high2 ^= 0xAAAAAAAAAAAAAAAALLU;
+ seq.high2 = ( ( ubyte8 ) BitReverseTable[seq.high2 & 0xffff] << 48 ) |
+ ( ( ubyte8 ) BitReverseTable[ ( seq.high2 >> 16 ) & 0xffff] << 32 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.high2 >> 32 ) & 0xffff] << 16 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.high2 >> 48 ) & 0xffff] );
+
+ if ( seq_size < 64 )
+ {
+ seq.high2 = seq.high2 ^ seq.low2;
+ seq.low2 = seq.high2 ^ seq.low2;
+ seq.high2 = seq.high2 ^ seq.low2;
+ seq = KmerRightBitMove ( seq, 128 - ( seq_size << 1 ) );
+ return seq;
+ }
+
+ seq.low1 ^= 0xAAAAAAAAAAAAAAAALLU;
+ seq.low1 = ( ( ubyte8 ) BitReverseTable[seq.low1 & 0xffff] << 48 ) |
+ ( ( ubyte8 ) BitReverseTable[ ( seq.low1 >> 16 ) & 0xffff] << 32 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.low1 >> 32 ) & 0xffff] << 16 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.low1 >> 48 ) & 0xffff] );
+
+ if ( seq_size < 96 )
+ {
+ seq.low1 = seq.low1 ^ seq.low2;
+ seq.low2 = seq.low1 ^ seq.low2;
+ seq.low1 = seq.low1 ^ seq.low2;
+ seq = KmerRightBitMove ( seq, 192 - ( seq_size << 1 ) );
+ return seq;
+ }
+
+ seq.high1 ^= 0xAAAAAAAAAAAAAAAALLU;
+ seq.high1 = ( ( ubyte8 ) BitReverseTable[seq.high1 & 0xffff] << 48 ) |
+ ( ( ubyte8 ) BitReverseTable[ ( seq.high1 >> 16 ) & 0xffff] << 32 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.high1 >> 32 ) & 0xffff] << 16 ) | ( ( ubyte8 ) BitReverseTable[ ( seq.high1 >> 48 ) & 0xffff] );
+ seq.low1 = seq.low1 ^ seq.high2;
+ seq.high2 = seq.low1 ^ seq.high2;
+ seq.low1 = seq.low1 ^ seq.high2;
+ seq.low2 = seq.low2 ^ seq.high1;
+ seq.high1 = seq.low2 ^ seq.high1;
+ seq.low2 = seq.low2 ^ seq.high1;
+ seq = KmerRightBitMove ( seq, 256 - ( seq_size << 1 ) );
+ return seq;
}
Kmer reverseComplementVerbose ( Kmer word, int overlap )
{
- return fastReverseComp ( word, overlap );
+ return fastReverseComp ( word, overlap );
}
Kmer reverseComplement ( Kmer word, int overlap )
{
- return fastReverseComp ( word, overlap );
+ return fastReverseComp ( word, overlap );
}
#else
-void PrintKmer ( FILE * fp, Kmer kmer )
+void PrintKmer ( FILE *fp, Kmer kmer )
{
- fprintf ( fp, "%llx %llx", kmer.high, kmer.low );
+ fprintf ( fp, "%llx %llx", kmer.high, kmer.low );
}
__uint128_t Kmer2int128 ( Kmer seq )
{
- __uint128_t temp;
- temp = seq.high;
- temp <<= 64;
- temp |= seq.low;
- return temp;
+ __uint128_t temp;
+ temp = seq.high;
+ temp <<= 64;
+ temp |= seq.low;
+ return temp;
}
boolean KmerSmaller ( Kmer kmer1, Kmer kmer2 )
{
- if ( kmer1.high < kmer2.high )
- {
- return 1;
- }
- else if ( kmer1.high == kmer2.high )
- {
- if ( kmer1.low < kmer2.low )
- {
- return 1;
- }
- else
- {
- return 0;
- }
- }
- else
- {
- return 0;
- }
+ if ( kmer1.high < kmer2.high )
+ {
+ return 1;
+ }
+ else if ( kmer1.high == kmer2.high )
+ {
+ if ( kmer1.low < kmer2.low )
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ else
+ {
+ return 0;
+ }
}
boolean KmerLarger ( Kmer kmer1, Kmer kmer2 )
{
- if ( kmer1.high > kmer2.high )
- {
- return 1;
- }
- else if ( kmer1.high == kmer2.high )
- {
- if ( kmer1.low > kmer2.low )
- {
- return 1;
- }
- else
- {
- return 0;
- }
- }
- else
- {
- return 0;
- }
+ if ( kmer1.high > kmer2.high )
+ {
+ return 1;
+ }
+ else if ( kmer1.high == kmer2.high )
+ {
+ if ( kmer1.low > kmer2.low )
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ else
+ {
+ return 0;
+ }
}
boolean KmerEqual ( Kmer kmer1, Kmer kmer2 )
{
- if ( kmer1.high == kmer2.high && kmer1.low == kmer2.low )
- {
- return 1;
- }
- else
- {
- return 0;
- }
+ if ( kmer1.high == kmer2.high && kmer1.low == kmer2.low )
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
}
Kmer KmerAnd ( Kmer kmer1, Kmer kmer2 )
{
- kmer1.high &= kmer2.high;
- kmer1.low &= kmer2.low;
- return kmer1;
+ kmer1.high &= kmer2.high;
+ kmer1.low &= kmer2.low;
+ return kmer1;
}
Kmer KmerLeftBitMoveBy2 ( Kmer word )
{
- ubyte8 temp = word.low >> 62;
- word.high <<= 2;
- word.high |= temp;
- word.low <<= 2;
- return word;
+ ubyte8 temp = word.low >> 62;
+ word.high <<= 2;
+ word.high |= temp;
+ word.low <<= 2;
+ return word;
}
Kmer KmerRightBitMoveBy2 ( Kmer word )
{
- ubyte8 temp = ( word.high & 0x3 ) << 62;
- word.high >>= 2;
- word.low >>= 2;
- word.low |= temp;
- return word;
+ ubyte8 temp = ( word.high & 0x3 ) << 62;
+ word.high >>= 2;
+ word.low >>= 2;
+ word.low |= temp;
+ return word;
}
Kmer KmerPlus ( Kmer prev, char ch )
{
- Kmer word = KmerLeftBitMoveBy2 ( prev );
- word.low |= ch;
- return word;
+ Kmer word = KmerLeftBitMoveBy2 ( prev );
+ word.low |= ch;
+ return word;
}
Kmer nextKmer ( Kmer prev, char ch )
{
- Kmer word = KmerLeftBitMoveBy2 ( prev );
- word = KmerAnd ( word, WORDFILTER );
- word.low |= ch;
- return word;
+ Kmer word = KmerLeftBitMoveBy2 ( prev );
+ word = KmerAnd ( word, WORDFILTER );
+ word.low |= ch;
+ return word;
}
Kmer prevKmer ( Kmer next, char ch )
{
- Kmer word = KmerRightBitMoveBy2 ( next );
+ Kmer word = KmerRightBitMoveBy2 ( next );
- if ( 2 * ( overlaplen - 1 ) < 64 )
- {
- word.low |= ( ( ( ubyte8 ) ch ) << 2 * ( overlaplen - 1 ) );
- }
- else
- {
- word.high |= ( ( ubyte8 ) ch ) << ( 2 * ( overlaplen - 1 ) - 64 );
- }
+ if ( 2 * ( overlaplen - 1 ) < 64 )
+ {
+ word.low |= ( ( ( ubyte8 ) ch ) << 2 * ( overlaplen - 1 ) );
+ }
+ else
+ {
+ word.high |= ( ( ubyte8 ) ch ) << ( 2 * ( overlaplen - 1 ) - 64 );
+ }
- return word;
+ return word;
}
char lastCharInKmer ( Kmer kmer )
{
- return ( char ) ( kmer.low & 0x3 );
+ return ( char ) ( kmer.low & 0x3 );
}
char firstCharInKmer ( Kmer kmer )
{
- if ( 2 * ( overlaplen - 1 ) < 64 )
- {
- kmer.low >>= 2 * ( overlaplen - 1 );
- return kmer.low; // & 3;
- }
- else
- {
- kmer.high >>= 2 * ( overlaplen - 1 ) - 64;
- return kmer.high; // & 3;
- }
+ if ( 2 * ( overlaplen - 1 ) < 64 )
+ {
+ kmer.low >>= 2 * ( overlaplen - 1 );
+ return kmer.low; // & 3;
+ }
+ else
+ {
+ kmer.high >>= 2 * ( overlaplen - 1 ) - 64;
+ return kmer.high; // & 3;
+ }
}
Kmer createFilter ( int overlaplen )
{
- Kmer word;
- word.high = word.low = 0;
+ Kmer word;
+ word.high = word.low = 0;
- if ( 2 * overlaplen < 64 )
- {
- word.low = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen ) ) - 1;
- }
- else
- {
- word.low = ~word.low;
+ if ( 2 * overlaplen < 64 )
+ {
+ word.low = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen ) ) - 1;
+ }
+ else
+ {
+ word.low = ~word.low;
- if ( 2 * overlaplen > 64 )
- {
- word.high = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen - 64 ) ) - 1;
- }
- }
+ if ( 2 * overlaplen > 64 )
+ {
+ word.high = ( ( ( ubyte8 ) 1 ) << ( 2 * overlaplen - 64 ) ) - 1;
+ }
+ }
- return word;
+ return word;
}
Kmer KmerRightBitMove ( Kmer word, int dis )
{
- if ( dis < 64 )
- {
- ubyte8 mask = ( ( ( ubyte8 ) 1 ) << dis ) - 1;
- ubyte8 temp = ( word.high & mask ) << ( 64 - dis );
- word.high >>= dis;
- word.low >>= dis;
- word.low |= temp;
- return word;
- }
+ if ( dis < 64 )
+ {
+ ubyte8 mask = ( ( ( ubyte8 ) 1 ) << dis ) - 1;
+ ubyte8 temp = ( word.high & mask ) << ( 64 - dis );
+ word.high >>= dis;
+ word.low >>= dis;
+ word.low |= temp;
+ return word;
+ }
- word.high >>= ( dis - 64 );
- word.low = word.high;
- word.high = 0;
- return word;
+ word.high >>= ( dis - 64 );
+ word.low = word.high;
+ word.high = 0;
+ return word;
}
-void printKmerSeq ( FILE * fp, Kmer kmer )
+void printKmerSeq ( FILE *fp, Kmer kmer )
{
- int i, bit1, bit2;
- char ch;
- char kmerSeq[64];
- bit2 = overlaplen > 32 ? 32 : overlaplen;
- bit1 = overlaplen > 32 ? overlaplen - 32 : 0;
+ int i, bit1, bit2;
+ char ch;
+ char kmerSeq[64];
+ bit2 = overlaplen > 32 ? 32 : overlaplen;
+ bit1 = overlaplen > 32 ? overlaplen - 32 : 0;
- for ( i = bit1 - 1; i >= 0; i-- )
- {
- ch = kmer.high & 0x3;
- kmer.high >>= 2;
- kmerSeq[i] = ch;
- }
+ for ( i = bit1 - 1; i >= 0; i-- )
+ {
+ ch = kmer.high & 0x3;
+ kmer.high >>= 2;
+ kmerSeq[i] = ch;
+ }
- for ( i = bit2 - 1; i >= 0; i-- )
- {
- ch = kmer.low & 0x3;
- kmer.low >>= 2;
- kmerSeq[i + bit1] = ch;
- }
+ for ( i = bit2 - 1; i >= 0; i-- )
+ {
+ ch = kmer.low & 0x3;
+ kmer.low >>= 2;
+ kmerSeq[i + bit1] = ch;
+ }
- for ( i = 0; i < overlaplen; i++ )
- {
- fprintf ( fp, "%c", int2base ( ( int ) kmerSeq[i] ) );
- }
+ for ( i = 0; i < overlaplen; i++ )
+ {
+ fprintf ( fp, "%c", int2base ( ( int ) kmerSeq[i] ) );
+ }
}
-void print_kmer ( FILE * fp, Kmer kmer, char c )
+void print_kmer ( FILE *fp, Kmer kmer, char c )
{
- fprintf ( fp, "%llx %llx", kmer.high, kmer.low );
- fprintf ( fp, "%c", c );
+ fprintf ( fp, "%llx %llx", kmer.high, kmer.low );
+ fprintf ( fp, "%c", c );
}
-void print_kmer_gz ( gzFile * fp, Kmer kmer, char c )
+void print_kmer_gz ( gzFile *fp, Kmer kmer, char c )
{
- gzprintf ( fp, "%llx %llx", kmer.high, kmer.low );
- gzprintf ( fp, "%c", c );
+ gzprintf ( fp, "%llx %llx", kmer.high, kmer.low );
+ gzprintf ( fp, "%c", c );
}
static Kmer fastReverseComp ( Kmer seq, char seq_size )
{
- seq.low ^= 0xAAAAAAAAAAAAAAAALLU;
- seq.low = ( ( seq.low & 0x3333333333333333LLU ) << 2 ) | ( ( seq.low & 0xCCCCCCCCCCCCCCCCLLU ) >> 2 );
- seq.low = ( ( seq.low & 0x0F0F0F0F0F0F0F0FLLU ) << 4 ) | ( ( seq.low & 0xF0F0F0F0F0F0F0F0LLU ) >> 4 );
- seq.low = ( ( seq.low & 0x00FF00FF00FF00FFLLU ) << 8 ) | ( ( seq.low & 0xFF00FF00FF00FF00LLU ) >> 8 );
- seq.low = ( ( seq.low & 0x0000FFFF0000FFFFLLU ) << 16 ) | ( ( seq.low & 0xFFFF0000FFFF0000LLU ) >> 16 );
- seq.low = ( ( seq.low & 0x00000000FFFFFFFFLLU ) << 32 ) | ( ( seq.low & 0xFFFFFFFF00000000LLU ) >> 32 );
-
- if ( seq_size < 32 )
- {
- seq.low >>= ( 64 - ( seq_size << 1 ) );
- return seq;
- }
-
- seq.high ^= 0xAAAAAAAAAAAAAAAALLU;
- seq.high = ( ( seq.high & 0x3333333333333333LLU ) << 2 ) | ( ( seq.high & 0xCCCCCCCCCCCCCCCCLLU ) >> 2 );
- seq.high = ( ( seq.high & 0x0F0F0F0F0F0F0F0FLLU ) << 4 ) | ( ( seq.high & 0xF0F0F0F0F0F0F0F0LLU ) >> 4 );
- seq.high = ( ( seq.high & 0x00FF00FF00FF00FFLLU ) << 8 ) | ( ( seq.high & 0xFF00FF00FF00FF00LLU ) >> 8 );
- seq.high = ( ( seq.high & 0x0000FFFF0000FFFFLLU ) << 16 ) | ( ( seq.high & 0xFFFF0000FFFF0000LLU ) >> 16 );
- seq.high = ( ( seq.high & 0x00000000FFFFFFFFLLU ) << 32 ) | ( ( seq.high & 0xFFFFFFFF00000000LLU ) >> 32 );
- ubyte8 temp = seq.high;
- seq.high = seq.low;
- seq.low = temp;
- seq = KmerRightBitMove ( seq, 128 - ( seq_size << 1 ) );
- return seq;
+ seq.low ^= 0xAAAAAAAAAAAAAAAALLU;
+ seq.low = ( ( seq.low & 0x3333333333333333LLU ) << 2 ) | ( ( seq.low & 0xCCCCCCCCCCCCCCCCLLU ) >> 2 );
+ seq.low = ( ( seq.low & 0x0F0F0F0F0F0F0F0FLLU ) << 4 ) | ( ( seq.low & 0xF0F0F0F0F0F0F0F0LLU ) >> 4 );
+ seq.low = ( ( seq.low & 0x00FF00FF00FF00FFLLU ) << 8 ) | ( ( seq.low & 0xFF00FF00FF00FF00LLU ) >> 8 );
+ seq.low = ( ( seq.low & 0x0000FFFF0000FFFFLLU ) << 16 ) | ( ( seq.low & 0xFFFF0000FFFF0000LLU ) >> 16 );
+ seq.low = ( ( seq.low & 0x00000000FFFFFFFFLLU ) << 32 ) | ( ( seq.low & 0xFFFFFFFF00000000LLU ) >> 32 );
+
+ if ( seq_size < 32 )
+ {
+ seq.low >>= ( 64 - ( seq_size << 1 ) );
+ return seq;
+ }
+
+ seq.high ^= 0xAAAAAAAAAAAAAAAALLU;
+ seq.high = ( ( seq.high & 0x3333333333333333LLU ) << 2 ) | ( ( seq.high & 0xCCCCCCCCCCCCCCCCLLU ) >> 2 );
+ seq.high = ( ( seq.high & 0x0F0F0F0F0F0F0F0FLLU ) << 4 ) | ( ( seq.high & 0xF0F0F0F0F0F0F0F0LLU ) >> 4 );
+ seq.high = ( ( seq.high & 0x00FF00FF00FF00FFLLU ) << 8 ) | ( ( seq.high & 0xFF00FF00FF00FF00LLU ) >> 8 );
+ seq.high = ( ( seq.high & 0x0000FFFF0000FFFFLLU ) << 16 ) | ( ( seq.high & 0xFFFF0000FFFF0000LLU ) >> 16 );
+ seq.high = ( ( seq.high & 0x00000000FFFFFFFFLLU ) << 32 ) | ( ( seq.high & 0xFFFFFFFF00000000LLU ) >> 32 );
+ ubyte8 temp = seq.high;
+ seq.high = seq.low;
+ seq.low = temp;
+ seq = KmerRightBitMove ( seq, 128 - ( seq_size << 1 ) );
+ return seq;
}
Kmer reverseComplementVerbose ( Kmer word, int overlap )
{
- return fastReverseComp ( word, overlap );
+ return fastReverseComp ( word, overlap );
}
Kmer reverseComplement ( Kmer word, int overlap )
{
- return fastReverseComp ( word, overlap );
+ return fastReverseComp ( word, overlap );
}
#endif
diff --git a/standardPregraph/kmerhash.c b/standardPregraph/kmerhash.c
index 8266dd0..2f5285f 100644
--- a/standardPregraph/kmerhash.c
+++ b/standardPregraph/kmerhash.c
@@ -1,7 +1,7 @@
/*
* kmerhash.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -35,27 +35,27 @@
#ifdef MER127
static const kmer_t2 empty_kmer2 = {{0, 0, 0, 0}, 0, 0, 0, 0};
//Get the hash key.
-static inline ubyte8 modular ( KmerSet2 * set, Kmer seq )
+static inline ubyte8 modular ( KmerSet2 *set, Kmer seq )
{
- ubyte8 temp;
- temp = ( seq.high1 % set->size ) << 32 | ( seq.low1 >> 32 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.low1 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.high2 >> 32 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.high2 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.low2 >> 32 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.low2 & 0xffffffff );
- temp = ( ubyte8 ) ( temp % set->size );
- return temp;
+ ubyte8 temp;
+ temp = ( seq.high1 % set->size ) << 32 | ( seq.low1 >> 32 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.low1 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.high2 >> 32 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.high2 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.low2 >> 32 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.low2 & 0xffffffff );
+ temp = ( ubyte8 ) ( temp % set->size );
+ return temp;
}
#else
static const kmer_t2 empty_kmer2 = {{0, 0}, 0, 0, 0, 0};
-static inline ubyte8 modular ( KmerSet2 * set, Kmer seq )
+static inline ubyte8 modular ( KmerSet2 *set, Kmer seq )
{
- ubyte8 hc;
- __uint128_t temp;
- temp = Kmer2int128 ( seq );
- hc = temp % set->size;
- return hc;
+ ubyte8 hc;
+ __uint128_t temp;
+ temp = Kmer2int128 ( seq );
+ hc = temp % set->size;
+ return hc;
}
#endif
@@ -73,19 +73,21 @@ Output:
Return:
None.
*************************************************/
-PUBLIC_FUNC void update_kmer2 ( kmer_t2 * mer, int id, char flag )
+PUBLIC_FUNC void update_kmer2 ( kmer_t2 *mer, int id, char flag )
{
- struct edgeID * edgeid;
- edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
- edgeid->edge = id;
- edgeid->flag = flag;
- edgeid->next = NULL;
-
- if ( mer->edgeId )
- { edgeid->next = mer->edgeId; }
-
- mer->edgeId = edgeid;
- mer->count++;
+ struct edgeID *edgeid;
+ edgeid = ( struct edgeID * ) malloc ( sizeof ( struct edgeID ) );
+ edgeid->edge = id;
+ edgeid->flag = flag;
+ edgeid->next = NULL;
+
+ if ( mer->edgeId )
+ {
+ edgeid->next = mer->edgeId;
+ }
+
+ mer->edgeId = edgeid;
+ mer->count++;
}
/*************************************************
@@ -103,35 +105,58 @@ Output:
Return:
None.
*************************************************/
-PUBLIC_FUNC void set_new_kmer2 ( kmer_t2 * mer, Kmer seq, int id, char flag )
+PUBLIC_FUNC void set_new_kmer2 ( kmer_t2 *mer, Kmer seq, int id, char flag )
{
- *mer = empty_kmer2;
- set_kmer_seq ( *mer, seq );
- update_kmer2 ( mer, id, flag );
+ *mer = empty_kmer2;
+ set_kmer_seq ( *mer, seq );
+ update_kmer2 ( mer, id, flag );
}
//Whether it's a prime number.
static inline int is_prime_kh ( ubyte8 num )
{
- ubyte8 i, max;
+ ubyte8 i, max;
- if ( num < 4 ) { return 1; }
+ if ( num < 4 )
+ {
+ return 1;
+ }
- if ( num % 2 == 0 ) { return 0; }
+ if ( num % 2 == 0 )
+ {
+ return 0;
+ }
- max = ( ubyte8 ) sqrt ( ( float ) num );
+ max = ( ubyte8 ) sqrt ( ( float ) num );
- for ( i = 3; i < max; i += 2 ) { if ( num % i == 0 ) { return 0; } }
+ for ( i = 3; i < max; i += 2 )
+ {
+ if ( num % i == 0 )
+ {
+ return 0;
+ }
+ }
- return 1;
+ return 1;
}
//Find next prime number.
static inline ubyte8 find_next_prime_kh ( ubyte8 num )
{
- if ( num % 2 == 0 ) { num ++; }
-
- while ( 1 ) { if ( is_prime_kh ( num ) ) { return num; } num += 2; }
+ if ( num % 2 == 0 )
+ {
+ num ++;
+ }
+
+ while ( 1 )
+ {
+ if ( is_prime_kh ( num ) )
+ {
+ return num;
+ }
+
+ num += 2;
+ }
}
/*************************************************
@@ -147,59 +172,76 @@ Output:
Return:
The initial kmerset.
*************************************************/
-PUBLIC_FUNC KmerSet2 * init_kmerset2 ( ubyte8 init_size, float load_factor )
+PUBLIC_FUNC KmerSet2 *init_kmerset2 ( ubyte8 init_size, float load_factor )
{
- KmerSet2 * set;
-
- if ( init_size < 3 ) { init_size = 3; }
- else { init_size = find_next_prime_kh ( init_size ); }
-
- set = ( KmerSet2 * ) malloc ( sizeof ( KmerSet2 ) );
- set->size = init_size;
- set->count = 0;
- set->max = set->size * load_factor;
-
- if ( load_factor <= 0 ) { load_factor = 0.25f; }
- else if ( load_factor >= 1 ) { load_factor = 0.75f; }
-
- set->load_factor = load_factor;
- set->iter_ptr = 0;
- set->array = calloc ( set->size, sizeof ( kmer_t2 ) );
- set->flags = malloc ( ( set->size + 15 ) / 16 * 4 );
- memset ( set->flags, 0x55, ( set->size + 15 ) / 16 * 4 );
- return set;
+ KmerSet2 *set;
+
+ if ( init_size < 3 )
+ {
+ init_size = 3;
+ }
+ else
+ {
+ init_size = find_next_prime_kh ( init_size );
+ }
+
+ set = ( KmerSet2 * ) malloc ( sizeof ( KmerSet2 ) );
+ set->size = init_size;
+ set->count = 0;
+ set->max = set->size * load_factor;
+
+ if ( load_factor <= 0 )
+ {
+ load_factor = 0.25f;
+ }
+ else if ( load_factor >= 1 )
+ {
+ load_factor = 0.75f;
+ }
+
+ set->load_factor = load_factor;
+ set->iter_ptr = 0;
+ set->array = calloc ( set->size, sizeof ( kmer_t2 ) );
+ set->flags = malloc ( ( set->size + 15 ) / 16 * 4 );
+ memset ( set->flags, 0x55, ( set->size + 15 ) / 16 * 4 );
+ return set;
}
-PROTECTED_FUNC static inline ubyte8 get_kmerset2 ( KmerSet2 * set, Kmer seq )
+PROTECTED_FUNC static inline ubyte8 get_kmerset2 ( KmerSet2 *set, Kmer seq )
{
- ubyte8 hc;
- // hc = modular (set, seq);
+ ubyte8 hc;
+ // hc = modular (set, seq);
#ifdef MER127
- hc = modular ( set, seq );
+ hc = modular ( set, seq );
#else
- __uint128_t temp;
- temp = Kmer2int128 ( seq );
- hc = temp % set->size;
+ __uint128_t temp;
+ temp = Kmer2int128 ( seq );
+ hc = temp % set->size;
#endif
- while ( 1 )
- {
- if ( is_kmer_entity_null ( set->flags, hc ) )
- {
- return hc;
- }
- else
- {
- if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
- { return hc; }
- }
-
- hc ++;
-
- if ( hc == set->size ) { hc = 0; }
- }
-
- return set->size;
+ while ( 1 )
+ {
+ if ( is_kmer_entity_null ( set->flags, hc ) )
+ {
+ return hc;
+ }
+ else
+ {
+ if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
+ {
+ return hc;
+ }
+ }
+
+ hc ++;
+
+ if ( hc == set->size )
+ {
+ hc = 0;
+ }
+ }
+
+ return set->size;
}
/*************************************************
@@ -216,39 +258,42 @@ Output:
Return:
1 if found.
*************************************************/
-PUBLIC_FUNC int search_kmerset2 ( KmerSet2 * set, Kmer seq, kmer_t2 ** rs )
+PUBLIC_FUNC int search_kmerset2 ( KmerSet2 *set, Kmer seq, kmer_t2 **rs )
{
- ubyte8 hc;
- // hc = modular (set, seq);
+ ubyte8 hc;
+ // hc = modular (set, seq);
#ifdef MER127
- hc = modular ( set, seq );
+ hc = modular ( set, seq );
#else
- __uint128_t temp;
- temp = Kmer2int128 ( seq );
- hc = temp % set->size;
+ __uint128_t temp;
+ temp = Kmer2int128 ( seq );
+ hc = temp % set->size;
#endif
- while ( 1 )
- {
- if ( is_kmer_entity_null ( set->flags, hc ) )
- {
- return 0;
- }
- else
- {
- if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
- {
- *rs = set->array + hc;
- return 1;
- }
- }
-
- hc ++;
-
- if ( hc == set->size ) { hc = 0; }
- }
-
- return 0;
+ while ( 1 )
+ {
+ if ( is_kmer_entity_null ( set->flags, hc ) )
+ {
+ return 0;
+ }
+ else
+ {
+ if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
+ {
+ *rs = set->array + hc;
+ return 1;
+ }
+ }
+
+ hc ++;
+
+ if ( hc == set->size )
+ {
+ hc = 0;
+ }
+ }
+
+ return 0;
}
/*************************************************
@@ -264,11 +309,11 @@ Output:
Return:
1 if it exists.
*************************************************/
-PUBLIC_FUNC static inline int exists_kmerset ( KmerSet2 * set, Kmer seq )
+PUBLIC_FUNC static inline int exists_kmerset ( KmerSet2 *set, Kmer seq )
{
- ubyte8 idx;
- idx = get_kmerset2 ( set, seq );
- return !is_kmer_entity_null ( set->flags, idx );
+ ubyte8 idx;
+ idx = get_kmerset2 ( set, seq );
+ return !is_kmer_entity_null ( set->flags, idx );
}
/*************************************************
@@ -284,97 +329,115 @@ Output:
Return:
None.
*************************************************/
-PROTECTED_FUNC static inline void encap_kmerset2 ( KmerSet2 * set, ubyte8 num )
+PROTECTED_FUNC static inline void encap_kmerset2 ( KmerSet2 *set, ubyte8 num )
{
- ubyte4 * flags, *f;
- ubyte8 i, n, size, hc;
- kmer_t2 key, tmp;
-
- if ( set->count + num <= set->max ) { return; }
-
- if ( initKmerSetSize != 0 )
- {
- if ( set->load_factor < 0.88 )
- {
- set->load_factor = 0.88;
- set->max = set->size * set->load_factor;
- return;
- }
- else
- {
- fprintf ( stderr, "-- Static memory pool exploded, please define a larger value. --\nloadFactor\t%f\nsize\t%llu\ncnt\t%llu\n", set->load_factor, set->size, set->count );
- abort();
- }
- }
-
- n = set->size;
-
- do
- {
- if ( n < 0xFFFFFFFU )
- { n <<= 1; }
- else
- { n += 0xFFFFFFU; }
-
- n = find_next_prime_kh ( n );
- }
- while ( n * set->load_factor < set->count + num );
-
- set->array = realloc ( set->array, n * sizeof ( kmer_t2 ) );
-
- if ( set->array == NULL )
- {
- fprintf ( stderr, "-- Out of memory --\n" );
- abort();
- }
-
- flags = malloc ( ( n + 15 ) / 16 * 4 );
- memset ( flags, 0x55, ( n + 15 ) / 16 * 4 );
- size = set->size;
- set->size = n;
- set->max = n * set->load_factor;
- f = set->flags;
- set->flags = flags;
- flags = f;
- __uint128_t temp;
-
- for ( i = 0; i < size; i++ )
- {
- if ( !exists_kmer_entity ( flags, i ) ) { continue; }
-
- key = set->array[i];
- set_kmer_entity_del ( flags, i );
-
- while ( 1 )
- {
- hc = modular ( set, get_kmer_seq ( key ) );
+ ubyte4 *flags, *f;
+ ubyte8 i, n, size, hc;
+ kmer_t2 key, tmp;
+
+ if ( set->count + num <= set->max )
+ {
+ return;
+ }
+
+ if ( initKmerSetSize != 0 )
+ {
+ if ( set->load_factor < 0.88 )
+ {
+ set->load_factor = 0.88;
+ set->max = set->size * set->load_factor;
+ return;
+ }
+ else
+ {
+ fprintf ( stderr, "-- Static memory pool exploded, please define a larger value. --\nloadFactor\t%f\nsize\t%llu\ncnt\t%llu\n", set->load_factor, set->size, set->count );
+ abort();
+ }
+ }
+
+ n = set->size;
+
+ do
+ {
+ if ( n < 0xFFFFFFFU )
+ {
+ n <<= 1;
+ }
+ else
+ {
+ n += 0xFFFFFFU;
+ }
+
+ n = find_next_prime_kh ( n );
+ }
+ while ( n * set->load_factor < set->count + num );
+
+ set->array = realloc ( set->array, n * sizeof ( kmer_t2 ) );
+
+ if ( set->array == NULL )
+ {
+ fprintf ( stderr, "-- Out of memory --\n" );
+ abort();
+ }
+
+ flags = malloc ( ( n + 15 ) / 16 * 4 );
+ memset ( flags, 0x55, ( n + 15 ) / 16 * 4 );
+ size = set->size;
+ set->size = n;
+ set->max = n * set->load_factor;
+ f = set->flags;
+ set->flags = flags;
+ flags = f;
+ __uint128_t temp;
+
+ for ( i = 0; i < size; i++ )
+ {
+ if ( !exists_kmer_entity ( flags, i ) )
+ {
+ continue;
+ }
+
+ key = set->array[i];
+ set_kmer_entity_del ( flags, i );
+
+ while ( 1 )
+ {
+ hc = modular ( set, get_kmer_seq ( key ) );
#ifdef MER127
- hc = modular ( set, get_kmer_seq ( key ) );
+ hc = modular ( set, get_kmer_seq ( key ) );
#else
- temp = Kmer2int128 ( get_kmer_seq ( key ) );
- hc = temp % set->size;
+ temp = Kmer2int128 ( get_kmer_seq ( key ) );
+ hc = temp % set->size;
#endif
- while ( !is_kmer_entity_null ( set->flags, hc ) ) { hc ++; if ( hc == set->size ) { hc = 0; } }
-
- clear_kmer_entity_null ( set->flags, hc );
-
- if ( hc < size && exists_kmer_entity ( flags, hc ) )
- {
- tmp = key;
- key = set->array[hc];
- set->array[hc] = tmp;
- set_kmer_entity_del ( flags, hc );
- }
- else
- {
- set->array[hc] = key;
- break;
- }
- }
- }
-
- free ( flags );
+ while ( !is_kmer_entity_null ( set->flags, hc ) )
+ {
+ hc ++;
+
+ if ( hc == set->size )
+ {
+ hc = 0;
+ }
+ }
+
+ clear_kmer_entity_null ( set->flags, hc );
+
+ if ( hc < size && exists_kmer_entity ( flags, hc ) )
+ {
+ tmp = key;
+ key = set->array[hc];
+ set->array[hc] = tmp;
+ set_kmer_entity_del ( flags, hc );
+ }
+ else
+ {
+ set->array[hc] = key;
+ break;
+ }
+ }
+ }
+
+ free ( flags );
}
/*************************************************
@@ -393,52 +456,55 @@ Output:
Return:
1 if it's successfully put kmer into kmerset.
*************************************************/
-PUBLIC_FUNC int put_kmerset2 ( KmerSet2 * set, Kmer seq, int id, char flag, kmer_t2 ** kmer_p )
+PUBLIC_FUNC int put_kmerset2 ( KmerSet2 *set, Kmer seq, int id, char flag, kmer_t2 **kmer_p )
{
- ubyte8 hc;
+ ubyte8 hc;
- if ( set->count + 1 > set->max )
- {
- encap_kmerset2 ( set, 1 );
- }
+ if ( set->count + 1 > set->max )
+ {
+ encap_kmerset2 ( set, 1 );
+ }
- // hc = modular (set, seq);
+ // hc = modular (set, seq);
#ifdef MER127
- hc = modular ( set, seq );
+ hc = modular ( set, seq );
#else
- __uint128_t temp;
- temp = Kmer2int128 ( seq );
- hc = temp % set->size;
+ __uint128_t temp;
+ temp = Kmer2int128 ( seq );
+ hc = temp % set->size;
#endif
- do
- {
- if ( is_kmer_entity_null ( set->flags, hc ) )
- {
- clear_kmer_entity_null ( set->flags, hc );
- set_new_kmer2 ( set->array + hc, seq, id, flag );
- set->count ++;
- *kmer_p = set->array + hc;
- return 0;
- }
- else
- {
- if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
- {
- update_kmer2 ( set->array + hc, id, flag );
- *kmer_p = set->array + hc;
- return 1;
- }
- }
-
- hc ++;
-
- if ( hc == set->size ) { hc = 0; }
- }
- while ( 1 );
-
- *kmer_p = NULL;
- return 0;
+ do
+ {
+ if ( is_kmer_entity_null ( set->flags, hc ) )
+ {
+ clear_kmer_entity_null ( set->flags, hc );
+ set_new_kmer2 ( set->array + hc, seq, id, flag );
+ set->count ++;
+ *kmer_p = set->array + hc;
+ return 0;
+ }
+ else
+ {
+ if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
+ {
+ update_kmer2 ( set->array + hc, id, flag );
+ *kmer_p = set->array + hc;
+ return 1;
+ }
+ }
+
+ hc ++;
+
+ if ( hc == set->size )
+ {
+ hc = 0;
+ }
+ }
+ while ( 1 );
+
+ *kmer_p = NULL;
+ return 0;
}
/*************************************************
@@ -453,63 +519,69 @@ Output:
Return:
The kmer number of the kmerset.
*************************************************/
-PUBLIC_FUNC byte8 count_kmerset2 ( KmerSet2 * set ) { return set->count; }
+PUBLIC_FUNC byte8 count_kmerset2 ( KmerSet2 *set )
+{
+ return set->count;
+}
-PUBLIC_FUNC static inline void reset_iter_kmerset2 ( KmerSet2 * set ) { set->iter_ptr = 0; }
+PUBLIC_FUNC static inline void reset_iter_kmerset2 ( KmerSet2 *set )
+{
+ set->iter_ptr = 0;
+}
-PUBLIC_FUNC static inline ubyte8 iter_kmerset2 ( KmerSet2 * set, kmer_t2 ** rs )
+PUBLIC_FUNC static inline ubyte8 iter_kmerset2 ( KmerSet2 *set, kmer_t2 **rs )
{
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- *rs = set->array + set->iter_ptr;
- set->iter_ptr ++;
- return 1;
- }
-
- set->iter_ptr ++;
- }
-
- return 0;
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ *rs = set->array + set->iter_ptr;
+ set->iter_ptr ++;
+ return 1;
+ }
+
+ set->iter_ptr ++;
+ }
+
+ return 0;
}
//Free.
-PUBLIC_FUNC void free_kmerset2 ( KmerSet2 * set )
+PUBLIC_FUNC void free_kmerset2 ( KmerSet2 *set )
{
- int i;
- struct edgeID * temp, *temp_next;
- kmer_t2 * node;
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- node = set->array + set->iter_ptr;
-
- if ( node )
- {
- temp = node->edgeId;
-
- while ( temp )
- {
- temp_next = temp->next;
- free ( ( void * ) temp );
- temp = temp_next;
- }
- }
- }
-
- set->iter_ptr ++;
- }
-
- free ( set->array );
- set->array = NULL;
- free ( set->flags );
- set->flags = NULL;
- free ( set );
- set = NULL;
+ int i;
+ struct edgeID *temp, *temp_next;
+ kmer_t2 *node;
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ node = set->array + set->iter_ptr;
+
+ if ( node )
+ {
+ temp = node->edgeId;
+
+ while ( temp )
+ {
+ temp_next = temp->next;
+ free ( ( void * ) temp );
+ temp = temp_next;
+ }
+ }
+ }
+
+ set->iter_ptr ++;
+ }
+
+ free ( set->array );
+ set->array = NULL;
+ free ( set->flags );
+ set->flags = NULL;
+ free ( set );
+ set = NULL;
}
/*************************************************
@@ -525,16 +597,16 @@ Output:
Return:
None.
*************************************************/
-PUBLIC_FUNC void free_Sets2 ( KmerSet2 ** sets, int num )
+PUBLIC_FUNC void free_Sets2 ( KmerSet2 **sets, int num )
{
- int i;
+ int i;
- for ( i = 0; i < num; ++i )
- {
- free_kmerset2 ( sets[i] );
- sets[i] = NULL;
- }
+ for ( i = 0; i < num; ++i )
+ {
+ free_kmerset2 ( sets[i] );
+ sets[i] = NULL;
+ }
- free ( ( void * ) sets );
- sets = NULL;
+ free ( ( void * ) sets );
+ sets = NULL;
}
diff --git a/standardPregraph/lib.c b/standardPregraph/lib.c
index 95e4eb7..ceb09d1 100644
--- a/standardPregraph/lib.c
+++ b/standardPregraph/lib.c
@@ -1,7 +1,7 @@
/*
* lib.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -42,586 +42,590 @@ Return:
*************************************************/
int getMaxLongReadLen ( int num_libs )
{
- int i;
- int maxLong = 0;
- boolean Has = 0;
-
- for ( i = 0; i < num_libs; i++ )
- {
- if ( lib_array[i].asm_flag != 4 )
- {
- continue;
- }
-
- Has = 1;
- maxLong = maxLong < lib_array[i].rd_len_cutoff ? lib_array[i].rd_len_cutoff : maxLong;
- }
-
- if ( !Has )
- {
- return maxLong;
- }
- else
- {
- return maxLong > 0 ? maxLong : maxReadLen;
- }
+ int i;
+ int maxLong = 0;
+ boolean Has = 0;
+
+ for ( i = 0; i < num_libs; i++ )
+ {
+ if ( lib_array[i].asm_flag != 4 )
+ {
+ continue;
+ }
+
+ Has = 1;
+ maxLong = maxLong < lib_array[i].rd_len_cutoff ? lib_array[i].rd_len_cutoff : maxLong;
+ }
+
+ if ( !Has )
+ {
+ return maxLong;
+ }
+ else
+ {
+ return maxLong > 0 ? maxLong : maxReadLen;
+ }
}
-static boolean splitColumn ( char * line )
+static boolean splitColumn ( char *line )
{
- int len = strlen ( line );
- int i = 0, j;
- int tabs_n = 0;
-
- while ( i < len )
- {
- if ( line[i] >= 32 && line[i] <= 126 && line[i] != '=' )
- {
- j = 0;
-
- while ( i < len && line[i] >= 32 && line[i] <= 126 && line[i] != '=' )
- {
- tabs[tabs_n][j++] = line[i];
- i++;
- }
-
- tabs[tabs_n][j] = '\0';
- tabs_n++;
-
- if ( tabs_n == 2 )
- {
- return 1;
- }
- }
-
- i++;
- }
-
- if ( tabs_n == 2 )
- {
- return 1;
- }
- else
- {
- return 0;
- }
+ int len = strlen ( line );
+ int i = 0, j;
+ int tabs_n = 0;
+
+ while ( i < len )
+ {
+ if ( line[i] >= 32 && line[i] <= 126 && line[i] != '=' )
+ {
+ j = 0;
+
+ while ( i < len && line[i] >= 32 && line[i] <= 126 && line[i] != '=' )
+ {
+ tabs[tabs_n][j++] = line[i];
+ i++;
+ }
+
+ tabs[tabs_n][j] = '\0';
+ tabs_n++;
+
+ if ( tabs_n == 2 )
+ {
+ return 1;
+ }
+ }
+
+ i++;
+ }
+
+ if ( tabs_n == 2 )
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
}
-static int cmp_lib ( const void * a, const void * b )
+static int cmp_lib ( const void *a, const void *b )
{
- LIB_INFO * A, *B;
- A = ( LIB_INFO * ) a;
- B = ( LIB_INFO * ) b;
-
- if ( A->avg_ins > B->avg_ins )
- {
- return 1;
- }
- else if ( A->avg_ins == B->avg_ins )
- {
- return 0;
- }
- else
- {
- return -1;
- }
+ LIB_INFO *A, *B;
+ A = ( LIB_INFO * ) a;
+ B = ( LIB_INFO * ) b;
+
+ if ( A->avg_ins > B->avg_ins )
+ {
+ return 1;
+ }
+ else if ( A->avg_ins == B->avg_ins )
+ {
+ return 0;
+ }
+ else
+ {
+ return -1;
+ }
}
-void scan_libInfo ( char * libfile )
+void scan_libInfo ( char *libfile )
{
- FILE * fp;
- char line[1024], ch;
- int i, j, index;
- int libCounter;
- boolean flag;
- boolean * pe;
- fp = ckopen ( libfile, "r" );
- num_libs = 0;
-
- while ( fgets ( line, 1024, fp ) )
- {
- ch = line[5];
- line[5] = '\0';
-
- if ( strcmp ( line, "[LIB]" ) == 0 )
- {
- num_libs++;
- }
-
- if ( !num_libs )
- {
- line[5] = ch;
- flag = splitColumn ( line );
-
- if ( !flag )
- {
- continue;
- }
-
- if ( strcmp ( tabs[0], "max_rd_len" ) == 0 )
- {
- maxReadLen = atoi ( tabs[1] );
- }
- }
- }
-
- if ( num_libs == 0 )
- {
- fprintf ( stderr, "Config file error: no [LIB] in file\n" );
- exit ( -1 );
- }
-
- //count file numbers of each type
- lib_array = ( LIB_INFO * ) ckalloc ( num_libs * sizeof ( LIB_INFO ) );
- pe = ( boolean * ) ckalloc ( num_libs * sizeof ( boolean ) );
-
- for ( i = 0; i < num_libs; i++ )
- {
- lib_array[i].asm_flag = 3;
- lib_array[i].rank = 0;
- lib_array[i].pair_num_cut = 0;
- lib_array[i].rd_len_cutoff = 0;
- lib_array[i].map_len = 0;
- lib_array[i].num_s_a_file = 0;
- lib_array[i].num_s_q_file = 0;
- lib_array[i].num_p_file = 0;
- lib_array[i].num_a1_file = 0;
- lib_array[i].num_a2_file = 0;
- lib_array[i].num_q1_file = 0;
- lib_array[i].num_q2_file = 0;
- lib_array[i].num_b_file = 0; //init
- pe[i] = false;
- }
-
- libCounter = -1;
- rewind ( fp );
- i = -1;
-
- while ( fgets ( line, 1024, fp ) )
- {
- ch = line[5];
- line[5] = '\0';
-
- if ( strcmp ( line, "[LIB]" ) == 0 )
- {
- i++;
- continue;
- }
-
- line[5] = ch;
- flag = splitColumn ( line );
-
- if ( !flag )
- {
- continue;
- }
-
- if ( strcmp ( tabs[0], "f1" ) == 0 )
- {
- lib_array[i].num_a1_file++;
- pe[i] = true;
- }
- else if ( strcmp ( tabs[0], "q1" ) == 0 )
- {
- lib_array[i].num_q1_file++;
- pe[i] = true;
- }
- else if ( strcmp ( tabs[0], "f2" ) == 0 )
- {
- lib_array[i].num_a2_file++;
- pe[i] = true;
- }
- else if ( strcmp ( tabs[0], "q2" ) == 0 )
- {
- lib_array[i].num_q2_file++;
- pe[i] = true;
- }
- else if ( strcmp ( tabs[0], "f" ) == 0 )
- {
- lib_array[i].num_s_a_file++;
- }
- else if ( strcmp ( tabs[0], "q" ) == 0 )
- {
- lib_array[i].num_s_q_file++;
- }
- else if ( strcmp ( tabs[0], "p" ) == 0 )
- {
- lib_array[i].num_p_file++;
- pe[i] = true;
- }
- else if ( strcmp ( tabs[0], "b" ) == 0 ) // the bam file
- {
- lib_array[i].num_b_file++;
- pe[i] = true;
- }
- }
-
- //allocate memory for filenames
- for ( i = 0; i < num_libs; i++ )
- {
- if ( lib_array[i].num_a2_file != lib_array[i].num_a1_file )
- {
- fprintf ( stderr, "Config file error: the number of mark \"f1\" is not the same as \"f2\"!\n" );
- exit ( -1 );
- }
-
- if ( lib_array[i].num_q2_file != lib_array[i].num_q1_file )
- {
- fprintf ( stderr, "Config file error: the number of mark \"q1\" is not the same as \"q2\"!\n" );
- exit ( -1 );
- }
-
- if ( lib_array[i].num_s_a_file )
- {
- lib_array[i].s_a_fname = ( char ** ) ckalloc ( lib_array[i].num_s_a_file * sizeof ( char * ) );
-
- for ( j = 0; j < lib_array[i].num_s_a_file; j++ )
- {
- lib_array[i].s_a_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
- }
- }
-
- if ( lib_array[i].num_s_q_file )
- {
- lib_array[i].s_q_fname = ( char ** ) ckalloc ( lib_array[i].num_s_q_file * sizeof ( char * ) );
-
- for ( j = 0; j < lib_array[i].num_s_q_file; j++ )
- {
- lib_array[i].s_q_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
- }
- }
-
- if ( lib_array[i].num_p_file )
- {
- lib_array[i].p_fname = ( char ** ) ckalloc ( lib_array[i].num_p_file * sizeof ( char * ) );
-
- for ( j = 0; j < lib_array[i].num_p_file; j++ )
- {
- lib_array[i].p_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
- }
- }
-
- if ( lib_array[i].num_a1_file )
- {
- lib_array[i].a1_fname = ( char ** ) ckalloc ( lib_array[i].num_a1_file * sizeof ( char * ) );
-
- for ( j = 0; j < lib_array[i].num_a1_file; j++ )
- {
- lib_array[i].a1_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
- }
- }
-
- if ( lib_array[i].num_a2_file )
- {
- lib_array[i].a2_fname = ( char ** ) ckalloc ( lib_array[i].num_a2_file * sizeof ( char * ) );
-
- for ( j = 0; j < lib_array[i].num_a2_file; j++ )
- {
- lib_array[i].a2_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
- }
- }
-
- if ( lib_array[i].num_q1_file )
- {
- lib_array[i].q1_fname = ( char ** ) ckalloc ( lib_array[i].num_q1_file * sizeof ( char * ) );
-
- for ( j = 0; j < lib_array[i].num_q1_file; j++ )
- {
- lib_array[i].q1_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
- }
- }
-
- if ( lib_array[i].num_q2_file )
- {
- lib_array[i].q2_fname = ( char ** ) ckalloc ( lib_array[i].num_q2_file * sizeof ( char * ) );
-
- for ( j = 0; j < lib_array[i].num_q2_file; j++ )
- {
- lib_array[i].q2_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
- }
- }
-
- if ( lib_array[i].num_b_file ) //allot memory for bam file name
- {
- lib_array[i].b_fname = ( char ** ) ckalloc ( lib_array[i].num_b_file * sizeof ( char * ) );
-
- for ( j = 0; j < lib_array[i].num_b_file; j++ )
- { lib_array[i].b_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) ); }
- }
- }
-
- // get file names
- for ( i = 0; i < num_libs; i++ )
- {
- lib_array[i].curr_type = 1;
- lib_array[i].curr_index = 0;
- lib_array[i].fp1 = NULL;
- lib_array[i].fp2 = NULL;
- lib_array[i].num_s_a_file = 0;
- lib_array[i].num_s_q_file = 0;
- lib_array[i].num_p_file = 0;
- lib_array[i].num_a1_file = 0;
- lib_array[i].num_a2_file = 0;
- lib_array[i].num_q1_file = 0;
- lib_array[i].num_q2_file = 0;
- lib_array[i].num_b_file = 0; //init
- lib_array[i].fp3 = NULL;
- }
-
- libCounter = -1;
- rewind ( fp );
- i = -1;
-
- while ( fgets ( line, 1024, fp ) )
- {
- ch = line[5];
- line[5] = '\0';
-
- if ( strcmp ( line, "[LIB]" ) == 0 )
- {
- i++;
- continue;
- }
-
- line[5] = ch;
- flag = splitColumn ( line );
-
- if ( !flag )
- {
- continue;
- }
-
- if ( strcmp ( tabs[0], "f1" ) == 0 )
- {
- index = lib_array[i].num_a1_file++;
- strcpy ( lib_array[i].a1_fname[index], tabs[1] );
- }
- else if ( strcmp ( tabs[0], "q1" ) == 0 )
- {
- index = lib_array[i].num_q1_file++;
- strcpy ( lib_array[i].q1_fname[index], tabs[1] );
- }
- else if ( strcmp ( tabs[0], "f2" ) == 0 )
- {
- index = lib_array[i].num_a2_file++;
- strcpy ( lib_array[i].a2_fname[index], tabs[1] );
-
- if ( strcmp ( lib_array[i].a2_fname[index], lib_array[i].a1_fname[index] ) == 0 )
- {
- fprintf ( stderr, "Config file error: f2 file is the same as f1 file\n" );
- fprintf ( stderr, "f1=%s\n", lib_array[i].a1_fname[index] );
- fprintf ( stderr, "f2=%s\n", lib_array[i].a2_fname[index] );
- exit ( -1 );
- }
- }
- else if ( strcmp ( tabs[0], "q2" ) == 0 )
- {
- index = lib_array[i].num_q2_file++;
- strcpy ( lib_array[i].q2_fname[index], tabs[1] );
-
- if ( strcmp ( lib_array[i].q2_fname[index], lib_array[i].q1_fname[index] ) == 0 )
- {
- fprintf ( stderr, "Config file error: q2 file is the same as q1 file\n" );
- fprintf ( stderr, "q1=%s\n", lib_array[i].q1_fname[index] );
- fprintf ( stderr, "q2=%s\n", lib_array[i].q2_fname[index] );
- exit ( -1 );
- }
- }
- else if ( strcmp ( tabs[0], "f" ) == 0 )
- {
- index = lib_array[i].num_s_a_file++;
- strcpy ( lib_array[i].s_a_fname[index], tabs[1] );
- }
- else if ( strcmp ( tabs[0], "q" ) == 0 )
- {
- index = lib_array[i].num_s_q_file++;
- strcpy ( lib_array[i].s_q_fname[index], tabs[1] );
- }
- else if ( strcmp ( tabs[0], "p" ) == 0 )
- {
- index = lib_array[i].num_p_file++;
- strcpy ( lib_array[i].p_fname[index], tabs[1] );
- }
- else if ( strcmp ( tabs[0], "b" ) == 0 )
- {
- //bam file
- index = lib_array[i].num_b_file++;
- strcpy ( lib_array[i].b_fname[index], tabs[1] );
- }
- else if ( strcmp ( tabs[0], "min_ins" ) == 0 )
- {
- lib_array[i].min_ins = atoi ( tabs[1] );
- }
- else if ( strcmp ( tabs[0], "max_ins" ) == 0 )
- {
- lib_array[i].max_ins = atoi ( tabs[1] );
- }
- else if ( strcmp ( tabs[0], "avg_ins" ) == 0 )
- {
- lib_array[i].avg_ins = atoi ( tabs[1] );
- }
- else if ( strcmp ( tabs[0], "rd_len_cutoff" ) == 0 )
- {
- lib_array[i].rd_len_cutoff = atoi ( tabs[1] );
- }
- else if ( strcmp ( tabs[0], "reverse_seq" ) == 0 )
- {
- lib_array[i].reverse = atoi ( tabs[1] );
- }
- else if ( strcmp ( tabs[0], "asm_flags" ) == 0 )
- {
- lib_array[i].asm_flag = atoi ( tabs[1] );
- }
- else if ( strcmp ( tabs[0], "rank" ) == 0 )
- {
- lib_array[i].rank = atoi ( tabs[1] );
- }
- else if ( strcmp ( tabs[0], "pair_num_cutoff" ) == 0 )
- {
- lib_array[i].pair_num_cut = atoi ( tabs[1] );
- }
- else if ( strcmp ( tabs[0], "rd_len_cutoff" ) == 0 )
- {
- lib_array[i].rd_len_cutoff = atoi ( tabs[1] );
- }
- else if ( strcmp ( tabs[0], "map_len" ) == 0 )
- {
- lib_array[i].map_len = atoi ( tabs[1] );
- }
- }
-
- for ( i = 0; i < num_libs; i++ )
- {
- if ( pe[i] && lib_array[i].avg_ins == 0 )
- {
- fprintf ( stderr, "Config file error: PE reads need avg_ins in [LIB] %d\n", i + 1 );
- exit ( -1 );
- }
- }
-
- fclose ( fp );
- qsort ( &lib_array[0], num_libs, sizeof ( LIB_INFO ), cmp_lib );
+ FILE *fp;
+ char line[1024], ch;
+ int i, j, index;
+ int libCounter;
+ boolean flag;
+ boolean *pe;
+ fp = ckopen ( libfile, "r" );
+ num_libs = 0;
+
+ while ( fgets ( line, 1024, fp ) )
+ {
+ ch = line[5];
+ line[5] = '\0';
+
+ if ( strcmp ( line, "[LIB]" ) == 0 )
+ {
+ num_libs++;
+ }
+
+ if ( !num_libs )
+ {
+ line[5] = ch;
+ flag = splitColumn ( line );
+
+ if ( !flag )
+ {
+ continue;
+ }
+
+ if ( strcmp ( tabs[0], "max_rd_len" ) == 0 )
+ {
+ maxReadLen = atoi ( tabs[1] );
+ }
+ }
+ }
+
+ if ( num_libs == 0 )
+ {
+ fprintf ( stderr, "Config file error: no [LIB] in file\n" );
+ exit ( -1 );
+ }
+
+ //count file numbers of each type
+ lib_array = ( LIB_INFO * ) ckalloc ( num_libs * sizeof ( LIB_INFO ) );
+ pe = ( boolean * ) ckalloc ( num_libs * sizeof ( boolean ) );
+
+ for ( i = 0; i < num_libs; i++ )
+ {
+ lib_array[i].asm_flag = 3;
+ lib_array[i].rank = 0;
+ lib_array[i].pair_num_cut = 0;
+ lib_array[i].rd_len_cutoff = 0;
+ lib_array[i].map_len = 0;
+ lib_array[i].num_s_a_file = 0;
+ lib_array[i].num_s_q_file = 0;
+ lib_array[i].num_p_file = 0;
+ lib_array[i].num_a1_file = 0;
+ lib_array[i].num_a2_file = 0;
+ lib_array[i].num_q1_file = 0;
+ lib_array[i].num_q2_file = 0;
+ lib_array[i].num_b_file = 0; //init
+ pe[i] = false;
+ }
+
+ libCounter = -1;
+ rewind ( fp );
+ i = -1;
+
+ while ( fgets ( line, 1024, fp ) )
+ {
+ ch = line[5];
+ line[5] = '\0';
+
+ if ( strcmp ( line, "[LIB]" ) == 0 )
+ {
+ i++;
+ continue;
+ }
+
+ line[5] = ch;
+ flag = splitColumn ( line );
+
+ if ( !flag )
+ {
+ continue;
+ }
+
+ if ( strcmp ( tabs[0], "f1" ) == 0 )
+ {
+ lib_array[i].num_a1_file++;
+ pe[i] = true;
+ }
+ else if ( strcmp ( tabs[0], "q1" ) == 0 )
+ {
+ lib_array[i].num_q1_file++;
+ pe[i] = true;
+ }
+ else if ( strcmp ( tabs[0], "f2" ) == 0 )
+ {
+ lib_array[i].num_a2_file++;
+ pe[i] = true;
+ }
+ else if ( strcmp ( tabs[0], "q2" ) == 0 )
+ {
+ lib_array[i].num_q2_file++;
+ pe[i] = true;
+ }
+ else if ( strcmp ( tabs[0], "f" ) == 0 )
+ {
+ lib_array[i].num_s_a_file++;
+ }
+ else if ( strcmp ( tabs[0], "q" ) == 0 )
+ {
+ lib_array[i].num_s_q_file++;
+ }
+ else if ( strcmp ( tabs[0], "p" ) == 0 )
+ {
+ lib_array[i].num_p_file++;
+ pe[i] = true;
+ }
+ else if ( strcmp ( tabs[0], "b" ) == 0 ) // the bam file
+ {
+ lib_array[i].num_b_file++;
+ pe[i] = true;
+ }
+ }
+
+ //allocate memory for filenames
+ for ( i = 0; i < num_libs; i++ )
+ {
+ if ( lib_array[i].num_a2_file != lib_array[i].num_a1_file )
+ {
+ fprintf ( stderr, "Config file error: the number of mark \"f1\" is not the same as \"f2\"!\n" );
+ exit ( -1 );
+ }
+
+ if ( lib_array[i].num_q2_file != lib_array[i].num_q1_file )
+ {
+ fprintf ( stderr, "Config file error: the number of mark \"q1\" is not the same as \"q2\"!\n" );
+ exit ( -1 );
+ }
+
+ if ( lib_array[i].num_s_a_file )
+ {
+ lib_array[i].s_a_fname = ( char ** ) ckalloc ( lib_array[i].num_s_a_file * sizeof ( char * ) );
+
+ for ( j = 0; j < lib_array[i].num_s_a_file; j++ )
+ {
+ lib_array[i].s_a_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
+ }
+ }
+
+ if ( lib_array[i].num_s_q_file )
+ {
+ lib_array[i].s_q_fname = ( char ** ) ckalloc ( lib_array[i].num_s_q_file * sizeof ( char * ) );
+
+ for ( j = 0; j < lib_array[i].num_s_q_file; j++ )
+ {
+ lib_array[i].s_q_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
+ }
+ }
+
+ if ( lib_array[i].num_p_file )
+ {
+ lib_array[i].p_fname = ( char ** ) ckalloc ( lib_array[i].num_p_file * sizeof ( char * ) );
+
+ for ( j = 0; j < lib_array[i].num_p_file; j++ )
+ {
+ lib_array[i].p_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
+ }
+ }
+
+ if ( lib_array[i].num_a1_file )
+ {
+ lib_array[i].a1_fname = ( char ** ) ckalloc ( lib_array[i].num_a1_file * sizeof ( char * ) );
+
+ for ( j = 0; j < lib_array[i].num_a1_file; j++ )
+ {
+ lib_array[i].a1_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
+ }
+ }
+
+ if ( lib_array[i].num_a2_file )
+ {
+ lib_array[i].a2_fname = ( char ** ) ckalloc ( lib_array[i].num_a2_file * sizeof ( char * ) );
+
+ for ( j = 0; j < lib_array[i].num_a2_file; j++ )
+ {
+ lib_array[i].a2_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
+ }
+ }
+
+ if ( lib_array[i].num_q1_file )
+ {
+ lib_array[i].q1_fname = ( char ** ) ckalloc ( lib_array[i].num_q1_file * sizeof ( char * ) );
+
+ for ( j = 0; j < lib_array[i].num_q1_file; j++ )
+ {
+ lib_array[i].q1_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
+ }
+ }
+
+ if ( lib_array[i].num_q2_file )
+ {
+ lib_array[i].q2_fname = ( char ** ) ckalloc ( lib_array[i].num_q2_file * sizeof ( char * ) );
+
+ for ( j = 0; j < lib_array[i].num_q2_file; j++ )
+ {
+ lib_array[i].q2_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
+ }
+ }
+
+ if ( lib_array[i].num_b_file ) //allot memory for bam file name
+ {
+ lib_array[i].b_fname = ( char ** ) ckalloc ( lib_array[i].num_b_file * sizeof ( char * ) );
+
+ for ( j = 0; j < lib_array[i].num_b_file; j++ )
+ {
+ lib_array[i].b_fname[j] = ( char * ) ckalloc ( 1024 * sizeof ( char ) );
+ }
+ }
+ }
+
+ // get file names
+ for ( i = 0; i < num_libs; i++ )
+ {
+ lib_array[i].curr_type = 1;
+ lib_array[i].curr_index = 0;
+ lib_array[i].fp1 = NULL;
+ lib_array[i].fp2 = NULL;
+ lib_array[i].num_s_a_file = 0;
+ lib_array[i].num_s_q_file = 0;
+ lib_array[i].num_p_file = 0;
+ lib_array[i].num_a1_file = 0;
+ lib_array[i].num_a2_file = 0;
+ lib_array[i].num_q1_file = 0;
+ lib_array[i].num_q2_file = 0;
+ lib_array[i].num_b_file = 0; //init
+ lib_array[i].fp3 = NULL;
+ }
+
+ libCounter = -1;
+ rewind ( fp );
+ i = -1;
+
+ while ( fgets ( line, 1024, fp ) )
+ {
+ ch = line[5];
+ line[5] = '\0';
+
+ if ( strcmp ( line, "[LIB]" ) == 0 )
+ {
+ i++;
+ continue;
+ }
+
+ line[5] = ch;
+ flag = splitColumn ( line );
+
+ if ( !flag )
+ {
+ continue;
+ }
+
+ if ( strcmp ( tabs[0], "f1" ) == 0 )
+ {
+ index = lib_array[i].num_a1_file++;
+ strcpy ( lib_array[i].a1_fname[index], tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "q1" ) == 0 )
+ {
+ index = lib_array[i].num_q1_file++;
+ strcpy ( lib_array[i].q1_fname[index], tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "f2" ) == 0 )
+ {
+ index = lib_array[i].num_a2_file++;
+ strcpy ( lib_array[i].a2_fname[index], tabs[1] );
+
+ if ( strcmp ( lib_array[i].a2_fname[index], lib_array[i].a1_fname[index] ) == 0 )
+ {
+ fprintf ( stderr, "Config file error: f2 file is the same as f1 file\n" );
+ fprintf ( stderr, "f1=%s\n", lib_array[i].a1_fname[index] );
+ fprintf ( stderr, "f2=%s\n", lib_array[i].a2_fname[index] );
+ exit ( -1 );
+ }
+ }
+ else if ( strcmp ( tabs[0], "q2" ) == 0 )
+ {
+ index = lib_array[i].num_q2_file++;
+ strcpy ( lib_array[i].q2_fname[index], tabs[1] );
+
+ if ( strcmp ( lib_array[i].q2_fname[index], lib_array[i].q1_fname[index] ) == 0 )
+ {
+ fprintf ( stderr, "Config file error: q2 file is the same as q1 file\n" );
+ fprintf ( stderr, "q1=%s\n", lib_array[i].q1_fname[index] );
+ fprintf ( stderr, "q2=%s\n", lib_array[i].q2_fname[index] );
+ exit ( -1 );
+ }
+ }
+ else if ( strcmp ( tabs[0], "f" ) == 0 )
+ {
+ index = lib_array[i].num_s_a_file++;
+ strcpy ( lib_array[i].s_a_fname[index], tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "q" ) == 0 )
+ {
+ index = lib_array[i].num_s_q_file++;
+ strcpy ( lib_array[i].s_q_fname[index], tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "p" ) == 0 )
+ {
+ index = lib_array[i].num_p_file++;
+ strcpy ( lib_array[i].p_fname[index], tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "b" ) == 0 )
+ {
+ //bam file
+ index = lib_array[i].num_b_file++;
+ strcpy ( lib_array[i].b_fname[index], tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "min_ins" ) == 0 )
+ {
+ lib_array[i].min_ins = atoi ( tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "max_ins" ) == 0 )
+ {
+ lib_array[i].max_ins = atoi ( tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "avg_ins" ) == 0 )
+ {
+ lib_array[i].avg_ins = atoi ( tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "rd_len_cutoff" ) == 0 )
+ {
+ lib_array[i].rd_len_cutoff = atoi ( tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "reverse_seq" ) == 0 )
+ {
+ lib_array[i].reverse = atoi ( tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "asm_flags" ) == 0 )
+ {
+ lib_array[i].asm_flag = atoi ( tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "rank" ) == 0 )
+ {
+ lib_array[i].rank = atoi ( tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "pair_num_cutoff" ) == 0 )
+ {
+ lib_array[i].pair_num_cut = atoi ( tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "rd_len_cutoff" ) == 0 )
+ {
+ lib_array[i].rd_len_cutoff = atoi ( tabs[1] );
+ }
+ else if ( strcmp ( tabs[0], "map_len" ) == 0 )
+ {
+ lib_array[i].map_len = atoi ( tabs[1] );
+ }
+ }
+
+ for ( i = 0; i < num_libs; i++ )
+ {
+ if ( pe[i] && lib_array[i].avg_ins == 0 )
+ {
+ fprintf ( stderr, "Config file error: PE reads need avg_ins in [LIB] %d\n", i + 1 );
+ exit ( -1 );
+ }
+ }
+
+ fclose ( fp );
+ qsort ( &lib_array[0], num_libs, sizeof ( LIB_INFO ), cmp_lib );
}
void free_libs ()
{
- if ( !lib_array )
- {
- return;
- }
-
- int i, j;
- fprintf ( stderr, "LIB(s) information:\n" );
-
- for ( i = 0; i < num_libs; i++ )
- {
- fprintf ( stderr, " [LIB] %d, avg_ins %d, reverse %d.\n", i, lib_array[i].avg_ins, lib_array[i].reverse );
-
- if ( lib_array[i].num_s_a_file )
- {
- //printf("%d single fasta files\n",lib_array[i].num_s_a_file);
- for ( j = 0; j < lib_array[i].num_s_a_file; j++ )
- {
- free ( ( void * ) lib_array[i].s_a_fname[j] );
- }
-
- free ( ( void * ) lib_array[i].s_a_fname );
- }
-
- if ( lib_array[i].num_s_q_file )
- {
- //printf("%d single fastq files\n",lib_array[i].num_s_q_file);
- for ( j = 0; j < lib_array[i].num_s_q_file; j++ )
- {
- free ( ( void * ) lib_array[i].s_q_fname[j] );
- }
-
- free ( ( void * ) lib_array[i].s_q_fname );
- }
-
- if ( lib_array[i].num_p_file )
- {
- //printf("%d paired fasta files\n",lib_array[i].num_p_file);
- for ( j = 0; j < lib_array[i].num_p_file; j++ )
- {
- free ( ( void * ) lib_array[i].p_fname[j] );
- }
-
- free ( ( void * ) lib_array[i].p_fname );
- }
-
- if ( lib_array[i].num_a1_file )
- {
- //printf("%d read1 fasta files\n",lib_array[i].num_a1_file);
- for ( j = 0; j < lib_array[i].num_a1_file; j++ )
- {
- free ( ( void * ) lib_array[i].a1_fname[j] );
- }
-
- free ( ( void * ) lib_array[i].a1_fname );
- }
-
- if ( lib_array[i].num_a2_file )
- {
- //printf("%d read2 fasta files\n",lib_array[i].num_a2_file);
- for ( j = 0; j < lib_array[i].num_a2_file; j++ )
- {
- free ( ( void * ) lib_array[i].a2_fname[j] );
- }
-
- free ( ( void * ) lib_array[i].a2_fname );
- }
-
- if ( lib_array[i].num_q1_file )
- {
- //printf("%d read1 fastq files\n",lib_array[i].num_q1_file);
- for ( j = 0; j < lib_array[i].num_q1_file; j++ )
- {
- free ( ( void * ) lib_array[i].q1_fname[j] );
- }
-
- free ( ( void * ) lib_array[i].q1_fname );
- }
-
- if ( lib_array[i].num_q2_file )
- {
- //printf("%d read2 fastq files\n",lib_array[i].num_q2_file);
- for ( j = 0; j < lib_array[i].num_q2_file; j++ )
- {
- free ( ( void * ) lib_array[i].q2_fname[j] );
- }
-
- free ( ( void * ) lib_array[i].q2_fname );
- }
-
- if ( lib_array[i].num_b_file )
- {
- //free the bam file name
- //printf("%d bam files\n",lib_array[i].num_b_file);
- for ( j = 0; j < lib_array[i].num_b_file; j++ )
- { free ( ( void * ) lib_array[i].b_fname[j] ); }
-
- free ( ( void * ) lib_array[i].b_fname );
- }
- }
-
- num_libs = 0;
- free ( ( void * ) lib_array );
+ if ( !lib_array )
+ {
+ return;
+ }
+
+ int i, j;
+ fprintf ( stderr, "LIB(s) information:\n" );
+
+ for ( i = 0; i < num_libs; i++ )
+ {
+ fprintf ( stderr, " [LIB] %d, avg_ins %d, reverse %d.\n", i, lib_array[i].avg_ins, lib_array[i].reverse );
+
+ if ( lib_array[i].num_s_a_file )
+ {
+ //printf("%d single fasta files\n",lib_array[i].num_s_a_file);
+ for ( j = 0; j < lib_array[i].num_s_a_file; j++ )
+ {
+ free ( ( void * ) lib_array[i].s_a_fname[j] );
+ }
+
+ free ( ( void * ) lib_array[i].s_a_fname );
+ }
+
+ if ( lib_array[i].num_s_q_file )
+ {
+ //printf("%d single fastq files\n",lib_array[i].num_s_q_file);
+ for ( j = 0; j < lib_array[i].num_s_q_file; j++ )
+ {
+ free ( ( void * ) lib_array[i].s_q_fname[j] );
+ }
+
+ free ( ( void * ) lib_array[i].s_q_fname );
+ }
+
+ if ( lib_array[i].num_p_file )
+ {
+ //printf("%d paired fasta files\n",lib_array[i].num_p_file);
+ for ( j = 0; j < lib_array[i].num_p_file; j++ )
+ {
+ free ( ( void * ) lib_array[i].p_fname[j] );
+ }
+
+ free ( ( void * ) lib_array[i].p_fname );
+ }
+
+ if ( lib_array[i].num_a1_file )
+ {
+ //printf("%d read1 fasta files\n",lib_array[i].num_a1_file);
+ for ( j = 0; j < lib_array[i].num_a1_file; j++ )
+ {
+ free ( ( void * ) lib_array[i].a1_fname[j] );
+ }
+
+ free ( ( void * ) lib_array[i].a1_fname );
+ }
+
+ if ( lib_array[i].num_a2_file )
+ {
+ //printf("%d read2 fasta files\n",lib_array[i].num_a2_file);
+ for ( j = 0; j < lib_array[i].num_a2_file; j++ )
+ {
+ free ( ( void * ) lib_array[i].a2_fname[j] );
+ }
+
+ free ( ( void * ) lib_array[i].a2_fname );
+ }
+
+ if ( lib_array[i].num_q1_file )
+ {
+ //printf("%d read1 fastq files\n",lib_array[i].num_q1_file);
+ for ( j = 0; j < lib_array[i].num_q1_file; j++ )
+ {
+ free ( ( void * ) lib_array[i].q1_fname[j] );
+ }
+
+ free ( ( void * ) lib_array[i].q1_fname );
+ }
+
+ if ( lib_array[i].num_q2_file )
+ {
+ //printf("%d read2 fastq files\n",lib_array[i].num_q2_file);
+ for ( j = 0; j < lib_array[i].num_q2_file; j++ )
+ {
+ free ( ( void * ) lib_array[i].q2_fname[j] );
+ }
+
+ free ( ( void * ) lib_array[i].q2_fname );
+ }
+
+ if ( lib_array[i].num_b_file )
+ {
+ //free the bam file name
+ //printf("%d bam files\n",lib_array[i].num_b_file);
+ for ( j = 0; j < lib_array[i].num_b_file; j++ )
+ {
+ free ( ( void * ) lib_array[i].b_fname[j] );
+ }
+
+ free ( ( void * ) lib_array[i].b_fname );
+ }
+ }
+
+ num_libs = 0;
+ free ( ( void * ) lib_array );
}
void alloc_pe_mem ( int gradsCounter )
{
- if ( gradsCounter )
- {
- pes = ( PE_INFO * ) ckalloc ( gradsCounter * sizeof ( PE_INFO ) );
- }
+ if ( gradsCounter )
+ {
+ pes = ( PE_INFO * ) ckalloc ( gradsCounter * sizeof ( PE_INFO ) );
+ }
}
void free_pe_mem ()
{
- if ( pes )
- {
- free ( ( void * ) pes );
- pes = NULL;
- }
+ if ( pes )
+ {
+ free ( ( void * ) pes );
+ pes = NULL;
+ }
}
diff --git a/standardPregraph/linearEdge.c b/standardPregraph/linearEdge.c
index 95772e9..2573aa3 100644
--- a/standardPregraph/linearEdge.c
+++ b/standardPregraph/linearEdge.c
@@ -1,7 +1,7 @@
/*
* linearEdge.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -30,40 +30,40 @@
//Get the char in Kmer.
char getCharInKmer ( Kmer kmer, int pos )
{
- if ( 2 * pos < 64 )
- {
- kmer.low2 >>= 2 * pos;
- return kmer.low2 & 3;
- }
- else if ( 2 * pos < 128 )
- {
- kmer.high2 >>= 2 * pos - 64;
- return kmer.high2 & 3;
- }
- else if ( 2 * pos < 192 )
- {
- kmer.low1 >>= 2 * pos - 128;
- return kmer.low1 & 3;
- }
- else
- {
- kmer.high1 >>= 2 * pos - 192;
- return kmer.high1 & 3;
- }
+ if ( 2 * pos < 64 )
+ {
+ kmer.low2 >>= 2 * pos;
+ return kmer.low2 & 3;
+ }
+ else if ( 2 * pos < 128 )
+ {
+ kmer.high2 >>= 2 * pos - 64;
+ return kmer.high2 & 3;
+ }
+ else if ( 2 * pos < 192 )
+ {
+ kmer.low1 >>= 2 * pos - 128;
+ return kmer.low1 & 3;
+ }
+ else
+ {
+ kmer.high1 >>= 2 * pos - 192;
+ return kmer.high1 & 3;
+ }
}
#else
char getCharInKmer ( Kmer kmer, int pos )
{
- if ( 2 * pos < 64 )
- {
- kmer.low >>= 2 * pos;
- return kmer.low & 3;
- }
- else
- {
- kmer.high >>= 2 * pos - 64;
- return kmer.high & 3;
- }
+ if ( 2 * pos < 64 )
+ {
+ kmer.low >>= 2 * pos;
+ return kmer.low & 3;
+ }
+ else
+ {
+ kmer.high >>= 2 * pos - 64;
+ return kmer.high & 3;
+ }
}
#endif
@@ -82,17 +82,17 @@ Output:
Return:
None.
*************************************************/
-void copyinter ( char * targetS, Kmer sourceS, int pos, int length )
+void copyinter ( char *targetS, Kmer sourceS, int pos, int length )
{
- char ch;
- int i, index;
- index = pos;
-
- for ( i = 0; i < length; ++i )
- {
- ch = getCharInKmer ( sourceS, step - i - 1 );
- writeChar2tightString ( ch, targetS, index++ );
- }
+ char ch;
+ int i, index;
+ index = pos;
+
+ for ( i = 0; i < length; ++i )
+ {
+ ch = getCharInKmer ( sourceS, step - i - 1 );
+ writeChar2tightString ( ch, targetS, index++ );
+ }
}
/*************************************************
@@ -110,17 +110,17 @@ Output:
Return:
None.
*************************************************/
-void copySeq2 ( char * targetS, char * sourceS, int pos, int length )
+void copySeq2 ( char *targetS, char *sourceS, int pos, int length )
{
- char ch;
- int i, index;
- index = pos;
-
- for ( i = 0; i < length; ++i )
- {
- ch = getCharInTightString ( sourceS, i );
- writeChar2tightString ( ch, targetS, index++ );
- }
+ char ch;
+ int i, index;
+ index = pos;
+
+ for ( i = 0; i < length; ++i )
+ {
+ ch = getCharInTightString ( sourceS, i );
+ writeChar2tightString ( ch, targetS, index++ );
+ }
}
/*************************************************
@@ -138,22 +138,24 @@ Return:
*************************************************/
int checkstep ( unsigned int to_vt, unsigned int from_vt )
{
- Kmer to, from;
- Kmer filtertemp;
- to = vt_arraynew[to_vt].kmer;
- from = vt_arraynew[from_vt].kmer;
- int i = 1;
- filtertemp = createFilter ( overlaplen - i );
-
- if ( KmerEqual ( KmerRightBitMove ( from, i << 1 ), KmerAnd ( to, filtertemp ) ) )
- { return i; }
-
- fprintf ( stderr, "When checking step of two edge, step is not found and step is changed to %d, 'to kmer' is ", step );
- printKmerSeq ( stderr, to );
- fprintf ( stderr, " , 'from kmer' is " );
- printKmerSeq ( stderr, from );
- fprintf ( stderr, " .\n" );
- return step;
+ Kmer to, from;
+ Kmer filtertemp;
+ to = vt_arraynew[to_vt].kmer;
+ from = vt_arraynew[from_vt].kmer;
+ int i = 1;
+ filtertemp = createFilter ( overlaplen - i );
+
+ if ( KmerEqual ( KmerRightBitMove ( from, i << 1 ), KmerAnd ( to, filtertemp ) ) )
+ {
+ return i;
+ }
+
+ fprintf ( stderr, "When checking step of two edge, step is not found and step is changed to %d, 'to kmer' is ", step );
+ printKmerSeq ( stderr, to );
+ fprintf ( stderr, " , 'from kmer' is " );
+ printKmerSeq ( stderr, from );
+ fprintf ( stderr, " .\n" );
+ return step;
}
/*************************************************
@@ -172,59 +174,63 @@ Return:
*************************************************/
void linearUpdateConnection2 ( unsigned int e1, unsigned int e2, int indicate )
{
- unsigned int bal_ed;
- ARC * parc;
-
- //caution: length and seq
- if ( !indicate )
- {
- // edge_array[e1].to_vt = edge_array[e2].to_vt;
- bal_ed = getTwinEdge ( e1 );
- parc = edge_array[e2].arcs;
-
- while ( parc )
- {
- parc->bal_arc->to_ed = bal_ed;
- parc = parc->next;
- }
-
- edge_array[e1].arcs = edge_array[e2].arcs;
- edge_array[e2].arcs = NULL;
-
- if ( edge_array[e1].length < 0 || edge_array[e2].length < 0 )
- { fprintf ( stderr, "Error: length < 0.\n" ); }
-
- if ( ( edge_array[e1].length || edge_array[e2].length ) && ( edge_array[e1].length + edge_array[e2].length + step > 0 ) )
- edge_array[e1].cvg = ( edge_array[e1].cvg * ( edge_array[e1].length + step )
- + edge_array[e2].cvg * ( edge_array[e2].length + step ) )
- / ( edge_array[e1].length + edge_array[e2].length + step );
-
- edge_array[e2].deleted = 1;
- }
- else
- {
- //all the arcs pointing to e1 switched to e2
- parc = edge_array[getTwinEdge ( e1 )].arcs;
-
- while ( parc )
- {
- parc->bal_arc->to_ed = e2;
- parc = parc->next;
- }
-
- edge_array[e1].arcs = NULL;
-
- // edge_array[e2].from_vt = edge_array[e1].from_vt;
- if ( edge_array[e1].length < 0 || edge_array[e2].length < 0 )
- { fprintf ( stderr, "Error: length < 0.\n" ); }
-
- if ( ( edge_array[e1].length || edge_array[e2].length ) && ( edge_array[e1].length + edge_array[e2].length + step > 0 ) )
- edge_array[e2].cvg = ( edge_array[e1].cvg * ( edge_array[e1].length + step )
- + edge_array[e2].cvg * ( edge_array[e2].length + step ) )
- / ( edge_array[e1].length + edge_array[e2].length + step );
-
- edge_array[e1].deleted = 1;
- }
+ unsigned int bal_ed;
+ ARC *parc;
+
+ //caution: length and seq
+ if ( !indicate )
+ {
+ // edge_array[e1].to_vt = edge_array[e2].to_vt;
+ bal_ed = getTwinEdge ( e1 );
+ parc = edge_array[e2].arcs;
+
+ while ( parc )
+ {
+ parc->bal_arc->to_ed = bal_ed;
+ parc = parc->next;
+ }
+
+ edge_array[e1].arcs = edge_array[e2].arcs;
+ edge_array[e2].arcs = NULL;
+
+ if ( edge_array[e1].length < 0 || edge_array[e2].length < 0 )
+ {
+ fprintf ( stderr, "Error: length < 0.\n" );
+ }
+
+ if ( ( edge_array[e1].length || edge_array[e2].length ) && ( edge_array[e1].length + edge_array[e2].length + step > 0 ) )
+ edge_array[e1].cvg = ( edge_array[e1].cvg * ( edge_array[e1].length + step )
+ + edge_array[e2].cvg * ( edge_array[e2].length + step ) )
+ / ( edge_array[e1].length + edge_array[e2].length + step );
+
+ edge_array[e2].deleted = 1;
+ }
+ else
+ {
+ //all the arcs pointing to e1 switched to e2
+ parc = edge_array[getTwinEdge ( e1 )].arcs;
+
+ while ( parc )
+ {
+ parc->bal_arc->to_ed = e2;
+ parc = parc->next;
+ }
+
+ edge_array[e1].arcs = NULL;
+
+ // edge_array[e2].from_vt = edge_array[e1].from_vt;
+ if ( edge_array[e1].length < 0 || edge_array[e2].length < 0 )
+ {
+ fprintf ( stderr, "Error: length < 0.\n" );
+ }
+
+ if ( ( edge_array[e1].length || edge_array[e2].length ) && ( edge_array[e1].length + edge_array[e2].length + step > 0 ) )
+ edge_array[e2].cvg = ( edge_array[e1].cvg * ( edge_array[e1].length + step )
+ + edge_array[e2].cvg * ( edge_array[e2].length + step ) )
+ / ( edge_array[e1].length + edge_array[e2].length + step );
+
+ edge_array[e1].deleted = 1;
+ }
}
/*************************************************
@@ -244,107 +250,127 @@ Return:
*************************************************/
void allpathUpdateEdge2 ( unsigned int e1, unsigned int e2, int indicate, boolean last )
{
- int tightLen;
- char * tightSeq = NULL;
- int tempstep = 0;
-
- //caution: length and seq
- if ( edge_array[e1].cvg == 0 )
- { edge_array[e1].cvg = edge_array[e2].cvg; }
-
- if ( edge_array[e2].cvg == 0 )
- { edge_array[e2].cvg = edge_array[e1].cvg; }
-
- unsigned int cvgsum =
- edge_array[e1].cvg * ( edge_array[e1].length + step )
- + edge_array[e2].cvg * ( edge_array[e2].length + step );
- tightLen = edge_array[e1].length + edge_array[e2].length + step;
-
- if ( tightLen )
- { tightSeq = ( char * ) ckalloc ( ( tightLen / 4 + 1 ) * sizeof ( char ) ); }
-
- tightLen = 0;
-
- if ( edge_array[e1].length )
- {
- copySeq2 ( tightSeq, edge_array[e1].seq, 0, edge_array[e1].length );
- tightLen = edge_array[e1].length;
-
- if ( edge_array[e1].seq )
- {
- free ( ( void * ) edge_array[e1].seq );
- edge_array[e1].seq = NULL;
- }
- else
- { fprintf ( stderr, "AllpathUpdateEdge: edge %d with length %d, but without seq.\n", e1, edge_array[e1].length ); }
- }
-
- {
- if ( step > 0 )
- {
- tempstep = checkstep ( edge_array[e1].to_vt, edge_array[e2].from_vt );
- copyinter ( tightSeq, vt_arraynew[edge_array[e2].from_vt].kmer, tightLen, tempstep );
- tightLen += tempstep;
- }
- }
-
- if ( edge_array[e2].length )
- {
- copySeq2 ( tightSeq, edge_array[e2].seq, tightLen, edge_array[e2].length );
- tightLen += edge_array[e2].length;
-
- if ( edge_array[e2].seq )
- {
- free ( ( void * ) edge_array[e2].seq );
- edge_array[e2].seq = NULL;
- }
- else
- { fprintf ( stderr, "AllpathUpdateEdge: edge %d with length %d, but without seq.\n", e2, edge_array[e2].length ); }
- }
-
- //edge_array[e2].extend_len = tightLen-edge_array[e2].length;
- //the sequence of e1 is to be updated
- if ( !indicate )
- {
- edge_array[e2].length = 0; //e2 is removed from the graph
- edge_array[e1].to_vt = edge_array[e2].to_vt; //e2 is part of e1 now
- edge_array[e1].length = tightLen;
- edge_array[e1].seq = tightSeq;
-
- if ( tightLen )
- { edge_array[e1].cvg = cvgsum / tightLen; }
-
- if ( last )
- { edge_array[e1].cvg = edge_array[e1].cvg > 0 ? edge_array[e1].cvg : 1; }
- }
- else
- {
- edge_array[e1].length = 0; //e1 is removed from the graph
- edge_array[e2].from_vt = edge_array[e1].from_vt; //e1 is part of e2 now
- edge_array[e2].length = tightLen;
- edge_array[e2].seq = tightSeq;
-
- if ( tightLen )
- { edge_array[e2].cvg = cvgsum / tightLen; }
-
- if ( last )
- { edge_array[e2].cvg = edge_array[e2].cvg > 0 ? edge_array[e2].cvg : 1; }
- }
+ int tightLen;
+ char *tightSeq = NULL;
+ int tempstep = 0;
+
+ //caution: length and seq
+ if ( edge_array[e1].cvg == 0 )
+ {
+ edge_array[e1].cvg = edge_array[e2].cvg;
+ }
+
+ if ( edge_array[e2].cvg == 0 )
+ {
+ edge_array[e2].cvg = edge_array[e1].cvg;
+ }
+
+ unsigned int cvgsum =
+ edge_array[e1].cvg * ( edge_array[e1].length + step )
+ + edge_array[e2].cvg * ( edge_array[e2].length + step );
+ tightLen = edge_array[e1].length + edge_array[e2].length + step;
+
+ if ( tightLen )
+ {
+ tightSeq = ( char * ) ckalloc ( ( tightLen / 4 + 1 ) * sizeof ( char ) );
+ }
+
+ tightLen = 0;
+
+ if ( edge_array[e1].length )
+ {
+ copySeq2 ( tightSeq, edge_array[e1].seq, 0, edge_array[e1].length );
+ tightLen = edge_array[e1].length;
+
+ if ( edge_array[e1].seq )
+ {
+ free ( ( void * ) edge_array[e1].seq );
+ edge_array[e1].seq = NULL;
+ }
+ else
+ {
+ fprintf ( stderr, "AllpathUpdateEdge: edge %d with length %d, but without seq.\n", e1, edge_array[e1].length );
+ }
+ }
+
+ {
+ if ( step > 0 )
+ {
+ tempstep = checkstep ( edge_array[e1].to_vt, edge_array[e2].from_vt );
+ copyinter ( tightSeq, vt_arraynew[edge_array[e2].from_vt].kmer, tightLen, tempstep );
+ tightLen += tempstep;
+ }
+ }
+
+ if ( edge_array[e2].length )
+ {
+ copySeq2 ( tightSeq, edge_array[e2].seq, tightLen, edge_array[e2].length );
+ tightLen += edge_array[e2].length;
+
+ if ( edge_array[e2].seq )
+ {
+ free ( ( void * ) edge_array[e2].seq );
+ edge_array[e2].seq = NULL;
+ }
+ else
+ {
+ fprintf ( stderr, "AllpathUpdateEdge: edge %d with length %d, but without seq.\n", e2, edge_array[e2].length );
+ }
+ }
+
+ //edge_array[e2].extend_len = tightLen-edge_array[e2].length;
+ //the sequence of e1 is to be updated
+ if ( !indicate )
+ {
+ edge_array[e2].length = 0; //e2 is removed from the graph
+ edge_array[e1].to_vt = edge_array[e2].to_vt; //e2 is part of e1 now
+ edge_array[e1].length = tightLen;
+ edge_array[e1].seq = tightSeq;
+
+ if ( tightLen )
+ {
+ edge_array[e1].cvg = cvgsum / tightLen;
+ }
+
+ if ( last )
+ {
+ edge_array[e1].cvg = edge_array[e1].cvg > 0 ? edge_array[e1].cvg : 1;
+ }
+ }
+ else
+ {
+ edge_array[e1].length = 0; //e1 is removed from the graph
+ edge_array[e2].from_vt = edge_array[e1].from_vt; //e1 is part of e2 now
+ edge_array[e2].length = tightLen;
+ edge_array[e2].seq = tightSeq;
+
+ if ( tightLen )
+ {
+ edge_array[e2].cvg = cvgsum / tightLen;
+ }
+
+ if ( last )
+ {
+ edge_array[e2].cvg = edge_array[e2].cvg > 0 ? edge_array[e2].cvg : 1;
+ }
+ }
}
static void debugging ( unsigned int i )
{
- ARC * parc;
- parc = edge_array[i].arcs;
-
- if ( !parc )
- { fprintf ( stderr, "No downward connection for %d.\n", i ); }
-
- while ( parc )
- {
- fprintf ( stderr, "%d -> %d\n", i, parc->to_ed );
- parc = parc->next;
- }
+ ARC *parc;
+ parc = edge_array[i].arcs;
+
+ if ( !parc )
+ {
+ fprintf ( stderr, "No downward connection for %d.\n", i );
+ }
+
+ while ( parc )
+ {
+ fprintf ( stderr, "%d -> %d\n", i, parc->to_ed );
+ parc = parc->next;
+ }
}
@@ -362,68 +388,80 @@ Return:
*************************************************/
void linearConcatenate2 ( boolean last )
{
- unsigned int i;
- int conc_c = 1;
- int counter;
- unsigned int from_ed, to_ed, bal_ed;
- ARC * parc, *parc2;
- unsigned int bal_fe;
- ARC * temp;
- int donot1 = 0;
- int round = 1;
-
- while ( conc_c )
- {
- conc_c = 0;
- counter = 0;
- donot1 = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
- { continue; }
-
- if ( edge_array[i].length > 0 )
- { counter++; }
-
- parc = edge_array[i].arcs;
-
- if ( !parc || parc->next )
- { continue; }
-
- to_ed = parc->to_ed;
- bal_ed = getTwinEdge ( to_ed );
- parc2 = edge_array[bal_ed].arcs;
-
- if ( bal_ed == to_ed || !parc2 || parc2->next )
- { continue; }
-
- from_ed = i;
-
- if ( from_ed == to_ed || from_ed == bal_ed )
- { continue; }
-
- //linear connection found
- if ( parc->multiplicity <= arcfilter )
- {
- donot1++;
- continue;
- }
-
- conc_c++;
- bal_fe = getTwinEdge ( from_ed );
- linearUpdateConnection2 ( from_ed, to_ed, 0 );
- allpathUpdateEdge2 ( from_ed, to_ed, 0, last );
- linearUpdateConnection2 ( bal_ed, bal_fe, 1 );
- allpathUpdateEdge2 ( bal_ed, bal_fe, 1, last );
- }
-
- fprintf ( stderr, "%d edge(s) concatenated in cycle %d.\n", conc_c, round++ );
-
- if ( arcfilter )
- { fprintf ( stderr, "%d edge(s) are not linearized because of arc weight is %d.\n", donot1, arcfilter ); }
- }
-
- fprintf ( stderr, "%d edge(s) in the graph.\n", counter );
+ unsigned int i;
+ int conc_c = 1;
+ int counter;
+ unsigned int from_ed, to_ed, bal_ed;
+ ARC *parc, *parc2;
+ unsigned int bal_fe;
+ ARC *temp;
+ int donot1 = 0;
+ int round = 1;
+
+ while ( conc_c )
+ {
+ conc_c = 0;
+ counter = 0;
+ donot1 = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].deleted || EdSameAsTwin ( i ) )
+ {
+ continue;
+ }
+
+ if ( edge_array[i].length > 0 )
+ {
+ counter++;
+ }
+
+ parc = edge_array[i].arcs;
+
+ if ( !parc || parc->next )
+ {
+ continue;
+ }
+
+ to_ed = parc->to_ed;
+ bal_ed = getTwinEdge ( to_ed );
+ parc2 = edge_array[bal_ed].arcs;
+
+ if ( bal_ed == to_ed || !parc2 || parc2->next )
+ {
+ continue;
+ }
+
+ from_ed = i;
+
+ if ( from_ed == to_ed || from_ed == bal_ed )
+ {
+ continue;
+ }
+
+ //linear connection found
+ if ( parc->multiplicity <= arcfilter )
+ {
+ donot1++;
+ continue;
+ }
+
+ conc_c++;
+ bal_fe = getTwinEdge ( from_ed );
+ linearUpdateConnection2 ( from_ed, to_ed, 0 );
+ allpathUpdateEdge2 ( from_ed, to_ed, 0, last );
+ linearUpdateConnection2 ( bal_ed, bal_fe, 1 );
+ allpathUpdateEdge2 ( bal_ed, bal_fe, 1, last );
+ }
+
+ fprintf ( stderr, "%d edge(s) concatenated in cycle %d.\n", conc_c, round++ );
+
+ if ( arcfilter )
+ {
+ fprintf ( stderr, "%d edge(s) are not linearized because of arc weight is %d.\n", donot1, arcfilter );
+ }
+ }
+
+ fprintf ( stderr, "%d edge(s) in the graph.\n", counter );
}
diff --git a/standardPregraph/loadGraph.c b/standardPregraph/loadGraph.c
index f9dafdd..858630f 100644
--- a/standardPregraph/loadGraph.c
+++ b/standardPregraph/loadGraph.c
@@ -1,7 +1,7 @@
/*
* loadGraph.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -28,8 +28,8 @@
#define preARCBLOCKSIZE 100000
-static unsigned int loadArcs ( char * graphfile );
-static void loadContig ( char * graphfile );
+static unsigned int loadArcs ( char *graphfile );
+static void loadContig ( char *graphfile );
static int maskRepeatByArc ( unsigned avg_weight );
/*
@@ -73,179 +73,185 @@ void loadUpdatedVertex (char *graphfile)
num_vt = num_kmer;
}*/
-int uniqueLenSearch ( unsigned int * len_array, unsigned int * flag_array, int num, unsigned int target )
+int uniqueLenSearch ( unsigned int *len_array, unsigned int *flag_array, int num, unsigned int target )
{
- int mid, low, high;
- low = 1;
- high = num;
-
- while ( low <= high )
- {
- mid = ( low + high ) / 2;
-
- if ( len_array[mid] == target )
- {
- break;
- }
- else if ( target > len_array[mid] )
- {
- low = mid + 1;
- }
- else
- {
- high = mid - 1;
- }
- }
-
- if ( low > high )
- {
- return -1;
- }
-
- //locate the first same length unflaged
- return flag_array[mid]++;
+ int mid, low, high;
+ low = 1;
+ high = num;
+
+ while ( low <= high )
+ {
+ mid = ( low + high ) / 2;
+
+ if ( len_array[mid] == target )
+ {
+ break;
+ }
+ else if ( target > len_array[mid] )
+ {
+ low = mid + 1;
+ }
+ else
+ {
+ high = mid - 1;
+ }
+ }
+
+ if ( low > high )
+ {
+ return -1;
+ }
+
+ //locate the first same length unflaged
+ return flag_array[mid]++;
}
-int lengthSearch ( unsigned int * len_array, unsigned int * flag_array, int num, unsigned int target )
+int lengthSearch ( unsigned int *len_array, unsigned int *flag_array, int num, unsigned int target )
{
- int mid, low, high, i;
- low = 1;
- high = num;
-
- while ( low <= high )
- {
- mid = ( low + high ) / 2;
-
- if ( len_array[mid] == target )
- {
- break;
- }
- else if ( target > len_array[mid] )
- {
- low = mid + 1;
- }
- else
- {
- high = mid - 1;
- }
- }
-
- if ( low > high )
- {
- return -1;
- }
-
- //locate the first same length unflaged
- if ( !flag_array[mid] )
- {
- for ( i = mid - 1; i > 0; i-- )
- {
- if ( len_array[i] != len_array[mid] || flag_array[i] )
- {
- break;
- }
- }
-
- flag_array[i + 1] = 1;
- return i + 1;
- }
- else
- {
- for ( i = mid + 1; i <= num; i++ )
- {
- if ( !flag_array[i] )
- {
- break;
- }
- }
-
- flag_array[i] = 1;
- return i;
- }
+ int mid, low, high, i;
+ low = 1;
+ high = num;
+
+ while ( low <= high )
+ {
+ mid = ( low + high ) / 2;
+
+ if ( len_array[mid] == target )
+ {
+ break;
+ }
+ else if ( target > len_array[mid] )
+ {
+ low = mid + 1;
+ }
+ else
+ {
+ high = mid - 1;
+ }
+ }
+
+ if ( low > high )
+ {
+ return -1;
+ }
+
+ //locate the first same length unflaged
+ if ( !flag_array[mid] )
+ {
+ for ( i = mid - 1; i > 0; i-- )
+ {
+ if ( len_array[i] != len_array[mid] || flag_array[i] )
+ {
+ break;
+ }
+ }
+
+ flag_array[i + 1] = 1;
+ return i + 1;
+ }
+ else
+ {
+ for ( i = mid + 1; i <= num; i++ )
+ {
+ if ( !flag_array[i] )
+ {
+ break;
+ }
+ }
+
+ flag_array[i] = 1;
+ return i;
+ }
}
-void quick_sort_int ( unsigned int * length_array, int low, int high )
+void quick_sort_int ( unsigned int *length_array, int low, int high )
{
- int i, j;
- unsigned int pivot;
-
- if ( low < high )
- {
- pivot = length_array[low];
- i = low;
- j = high;
-
- while ( i < j )
- {
- while ( i < j && length_array[j] >= pivot )
- {
- j--;
- }
-
- if ( i < j )
- {
- length_array[i++] = length_array[j];
- }
-
- while ( i < j && length_array[i] <= pivot )
- {
- i++;
- }
-
- if ( i < j )
- {
- length_array[j--] = length_array[i];
- }
- }
-
- length_array[i] = pivot;
- quick_sort_int ( length_array, low, i - 1 );
- quick_sort_int ( length_array, i + 1, high );
- }
+ int i, j;
+ unsigned int pivot;
+
+ if ( low < high )
+ {
+ pivot = length_array[low];
+ i = low;
+ j = high;
+
+ while ( i < j )
+ {
+ while ( i < j && length_array[j] >= pivot )
+ {
+ j--;
+ }
+
+ if ( i < j )
+ {
+ length_array[i++] = length_array[j];
+ }
+
+ while ( i < j && length_array[i] <= pivot )
+ {
+ i++;
+ }
+
+ if ( i < j )
+ {
+ length_array[j--] = length_array[i];
+ }
+ }
+
+ length_array[i] = pivot;
+ quick_sort_int ( length_array, low, i - 1 );
+ quick_sort_int ( length_array, i + 1, high );
+ }
}
static int maskRepeatByArc ( unsigned avg_weight )
{
- unsigned int i, bal_i;
- int counter = 0;
- int arc_num;
- unsigned int arc_weight1, arc_weight2;
- preARC * arc;
-
- for ( i = 1; i <= num_ctg; ++i )
- {
- if ( contig_array[i].mask == 1 )
- {
- if ( isSmallerThanTwin ( i ) )
- {
- ++i;
- }
-
- continue;
- }
-
- bal_i = getTwinCtg ( i );
- arc = contig_array[bal_i].arcs;
- arc_weight1 = maxArcWeight ( arc );
- arc = contig_array[i].arcs;
- arc_weight2 = maxArcWeight ( arc );
-
- if ( arc_weight1 + arc_weight2 >= 4 * avg_weight )
- {
- contig_array[i].mask = 1;
- contig_array[bal_i].mask = 1;
-
- if ( i == bal_i ) { counter += 1; }
- else { counter += 2; }
- }
-
- if ( isSmallerThanTwin ( i ) )
- {
- ++i;
- }
- }
-
- return counter;
+ unsigned int i, bal_i;
+ int counter = 0;
+ int arc_num;
+ unsigned int arc_weight1, arc_weight2;
+ preARC *arc;
+
+ for ( i = 1; i <= num_ctg; ++i )
+ {
+ if ( contig_array[i].mask == 1 )
+ {
+ if ( isSmallerThanTwin ( i ) )
+ {
+ ++i;
+ }
+
+ continue;
+ }
+
+ bal_i = getTwinCtg ( i );
+ arc = contig_array[bal_i].arcs;
+ arc_weight1 = maxArcWeight ( arc );
+ arc = contig_array[i].arcs;
+ arc_weight2 = maxArcWeight ( arc );
+
+ if ( arc_weight1 + arc_weight2 >= 4 * avg_weight )
+ {
+ contig_array[i].mask = 1;
+ contig_array[bal_i].mask = 1;
+
+ if ( i == bal_i )
+ {
+ counter += 1;
+ }
+ else
+ {
+ counter += 2;
+ }
+ }
+
+ if ( isSmallerThanTwin ( i ) )
+ {
+ ++i;
+ }
+ }
+
+ return counter;
}
/*************************************************
@@ -260,234 +266,264 @@ static int maskRepeatByArc ( unsigned avg_weight )
Return:
None.
*************************************************/
-void loadUpdatedEdges ( char * graphfile )
+void loadUpdatedEdges ( char *graphfile )
{
- char c, name[256], line[1024];
- int bal_ed, cvg;
- FILE * fp, *out_fp;
- Kmer from_kmer, to_kmer;
- unsigned int num_ctgge, length, index = 0, num_kmer;
- unsigned int i = 0, j;
- int newIndex;
- unsigned int * length_array, *flag_array, diff_len;
- char * outfile = graphfile;
- long long cvgSum = 0;
- long long counter = 0;
- unsigned int avg_arc_wt;
- int ctg_short_cutoff;
- float high_cvg_cutoff1, high_cvg_cutoff2, low_cvg_cutoff;
- int cut_len;
- //get overlaplen from *.preGraphBasic
- sprintf ( name, "%s.preGraphBasic", graphfile );
- fp = ckopen ( name, "r" );
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == 'V' )
- {
- sscanf ( line + 6, "%d %c %d", &num_kmer, &c, &overlaplen );
- fprintf ( stderr, "Kmer size: %d\n", overlaplen );
- break;
- }
- }
-
- cut_len = COMPATIBLE_MODE == 0 ? overlaplen : 0;
-
- if ( ctg_short == 0 )
- {
- ctg_short = overlaplen + 2;
- }
-
- ctg_short_cutoff = 2 * overlaplen + 2 < 100 ? 100 : 0;
- fclose ( fp );
- sprintf ( name, "%s.updated.edge", graphfile );
- fp = ckopen ( name, "r" );
- sprintf ( name, "%s.newContigIndex", outfile );
- out_fp = ckopen ( name, "w" );
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == 'E' )
- {
- sscanf ( line + 5, "%d", &num_ctgge );
- fprintf ( stderr, "There are %d edge(s) in edge file.\n", num_ctgge );
- break;
- }
- }
-
- index_array = ( unsigned int * ) ckalloc ( ( num_ctgge + 1 ) * sizeof ( unsigned int ) );
- length_array = ( unsigned int * ) ckalloc ( ( num_ctgge + 1 ) * sizeof ( unsigned int ) );
- flag_array = ( unsigned int * ) ckalloc ( ( num_ctgge + 1 ) * sizeof ( unsigned int ) );
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == '>' )
- {
- sscanf ( line + 7, "%d", &length );
- index_array[++index] = length;
- length_array[++i] = length;
- }
- }
-
- num_ctg = index;
- orig2new = 1;
- qsort ( & ( length_array[1] ), num_ctg, sizeof ( length_array[0] ), cmp_int );
- //extract unique length
- diff_len = 0;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- for ( j = i + 1; j <= num_ctg; j++ )
- if ( length_array[j] != length_array[i] )
- {
- break;
- }
-
- length_array[++diff_len] = length_array[i];
- flag_array[diff_len] = i;
- i = j - 1;
- }
-
- contig_array = ( CONTIG * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( CONTIG ) );
- //load edges
- index = 0;
- rewind ( fp );
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == '>' )
- {
- sscanf ( line, ">length %u,%d,%d", &length, &bal_ed, &cvg );
- newIndex = uniqueLenSearch ( length_array, flag_array, diff_len, length );
- index_array[++index] = newIndex;
-
- if ( length != 0 ) { contig_array[newIndex].length = length - cut_len; }
- else { contig_array[newIndex].length = 0; }
-
- contig_array[newIndex].bal_edge = bal_ed + 1;
- contig_array[newIndex].downwardConnect = NULL;
- contig_array[newIndex].mask = 0;
- contig_array[newIndex].flag = 0;
- contig_array[newIndex].arcs = NULL;
- contig_array[newIndex].seq = NULL;
- contig_array[newIndex].multi = 0;
- contig_array[newIndex].inSubGraph = 0;
- contig_array[newIndex].bubbleInScaff = 0;
- contig_array[newIndex].cvg = cvg / 10;
-
- if ( cvg && length > 100 )
- {
- counter += length - cut_len;
- cvgSum += cvg * ( length - cut_len );
- }
-
- fprintf ( out_fp, "%d %d %d\n", index, newIndex, contig_array[newIndex].bal_edge );
- }
- }
-
- if ( counter )
- {
- cvgAvg = cvgSum / counter / 10 > 2 ? cvgSum / counter / 10 : 3;
- }
-
- //mark repeats
- int bal_i;
-
- if ( maskRep )
- {
- high_cvg_cutoff1 = cvg_high * cvgAvg;
- high_cvg_cutoff2 = cvg_high * cvgAvg * 0.8;
- low_cvg_cutoff = cvg_low * cvgAvg;
- counter = 0;
- fprintf ( stderr, "Mask contigs with coverage lower than %.1f or higher than %.1f, and strict length %d.\n", low_cvg_cutoff, high_cvg_cutoff1, ctg_short_cutoff );
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- bal_i = getTwinCtg ( i );
-
- if ( ( contig_array[i].cvg + contig_array[bal_i].cvg ) > 2 * high_cvg_cutoff1 )
- {
- contig_array[i].mask = 1;
- contig_array[bal_i].mask = 1;
-
- if ( i == bal_i ) { counter += 1; }
- else { counter += 2; }
- }
- else if ( contig_array[i].length < ctg_short_cutoff && ( contig_array[i].cvg > high_cvg_cutoff2 || contig_array[bal_i].cvg > high_cvg_cutoff2 || ( contig_array[i].cvg < low_cvg_cutoff && contig_array[bal_i].cvg < low_cvg_cutoff ) ) )
- {
- contig_array[i].mask = 1;
- contig_array[bal_i].mask = 1;
-
- if ( i == bal_i ) { counter += 1; }
- else { counter += 2; }
- }
- else if ( cvgAvg < 50 && ( contig_array[i].cvg >= 63 || contig_array[bal_i].cvg >= 63 ) )
- {
- contig_array[i].mask = 1;
- contig_array[bal_i].mask = 1;
-
- if ( i == bal_i ) { counter += 1; }
- else { counter += 2; }
- }
-
- if ( isSmallerThanTwin ( i ) )
- {
- i++;
- }
- }
-
- fprintf ( stderr, "Average contig coverage is %d, %lld contig(s) masked.\n", cvgAvg, counter );
- }
-
- counter = 0;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].mask )
- {
- continue;
- }
-
- bal_i = getTwinCtg ( i );
-
- if ( contig_array[i].length < ctg_short )
- {
- contig_array[i].mask = 1;
- contig_array[bal_i].mask = 1;
-
- if ( i == bal_i ) { counter += 1; }
- else { counter += 2; }
- }
-
- if ( isSmallerThanTwin ( i ) )
- {
- i++;
- }
- }
-
- fprintf ( stderr, "Mask contigs shorter than %d, %lld contig(s) masked.\n", ctg_short, counter );
- avg_arc_wt = loadArcs ( graphfile );
- counter = 0;
- //counter = maskRepeatByArc(avg_arc_wt);
- //printf ("Mask contigs with multi arcs, %d contig masked\n", counter);
- //tipsCount();
- loadContig ( graphfile );
- fprintf ( stderr, "Done loading updated edges.\n" );
- free ( ( void * ) length_array );
- free ( ( void * ) flag_array );
- fclose ( fp );
- fclose ( out_fp );
+ char c, name[256], line[1024];
+ int bal_ed, cvg;
+ FILE *fp, *out_fp;
+ Kmer from_kmer, to_kmer;
+ unsigned int num_ctgge, length, index = 0, num_kmer;
+ unsigned int i = 0, j;
+ int newIndex;
+ unsigned int *length_array, *flag_array, diff_len;
+ char *outfile = graphfile;
+ long long cvgSum = 0;
+ long long counter = 0;
+ unsigned int avg_arc_wt;
+ int ctg_short_cutoff;
+ float high_cvg_cutoff1, high_cvg_cutoff2, low_cvg_cutoff;
+ int cut_len;
+ //get overlaplen from *.preGraphBasic
+ sprintf ( name, "%s.preGraphBasic", graphfile );
+ fp = ckopen ( name, "r" );
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == 'V' )
+ {
+ sscanf ( line + 6, "%d %c %d", &num_kmer, &c, &overlaplen );
+ fprintf ( stderr, "Kmer size: %d\n", overlaplen );
+ break;
+ }
+ }
+
+ cut_len = COMPATIBLE_MODE == 0 ? overlaplen : 0;
+
+ if ( ctg_short == 0 )
+ {
+ ctg_short = overlaplen + 2;
+ }
+
+ ctg_short_cutoff = 2 * overlaplen + 2 < 100 ? 100 : 0;
+ fclose ( fp );
+ sprintf ( name, "%s.updated.edge", graphfile );
+ fp = ckopen ( name, "r" );
+ sprintf ( name, "%s.newContigIndex", outfile );
+ out_fp = ckopen ( name, "w" );
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == 'E' )
+ {
+ sscanf ( line + 5, "%d", &num_ctgge );
+ fprintf ( stderr, "There are %d edge(s) in edge file.\n", num_ctgge );
+ break;
+ }
+ }
+
+ index_array = ( unsigned int * ) ckalloc ( ( num_ctgge + 1 ) * sizeof ( unsigned int ) );
+ length_array = ( unsigned int * ) ckalloc ( ( num_ctgge + 1 ) * sizeof ( unsigned int ) );
+ flag_array = ( unsigned int * ) ckalloc ( ( num_ctgge + 1 ) * sizeof ( unsigned int ) );
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == '>' )
+ {
+ sscanf ( line + 7, "%d", &length );
+ index_array[++index] = length;
+ length_array[++i] = length;
+ }
+ }
+
+ num_ctg = index;
+ orig2new = 1;
+ qsort ( & ( length_array[1] ), num_ctg, sizeof ( length_array[0] ), cmp_int );
+ //extract unique length
+ diff_len = 0;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ for ( j = i + 1; j <= num_ctg; j++ )
+ if ( length_array[j] != length_array[i] )
+ {
+ break;
+ }
+
+ length_array[++diff_len] = length_array[i];
+ flag_array[diff_len] = i;
+ i = j - 1;
+ }
+
+ contig_array = ( CONTIG * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( CONTIG ) );
+ //load edges
+ index = 0;
+ rewind ( fp );
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == '>' )
+ {
+ sscanf ( line, ">length %u,%d,%d", &length, &bal_ed, &cvg );
+ newIndex = uniqueLenSearch ( length_array, flag_array, diff_len, length );
+ index_array[++index] = newIndex;
+
+ if ( length != 0 )
+ {
+ contig_array[newIndex].length = length - cut_len;
+ }
+ else
+ {
+ contig_array[newIndex].length = 0;
+ }
+
+ contig_array[newIndex].bal_edge = bal_ed + 1;
+ contig_array[newIndex].downwardConnect = NULL;
+ contig_array[newIndex].mask = 0;
+ contig_array[newIndex].flag = 0;
+ contig_array[newIndex].arcs = NULL;
+ contig_array[newIndex].seq = NULL;
+ contig_array[newIndex].multi = 0;
+ contig_array[newIndex].inSubGraph = 0;
+ contig_array[newIndex].bubbleInScaff = 0;
+ contig_array[newIndex].cvg = cvg / 10;
+
+ if ( cvg && length > 100 )
+ {
+ counter += length - cut_len;
+ cvgSum += cvg * ( length - cut_len );
+ }
+
+ fprintf ( out_fp, "%d %d %d\n", index, newIndex, contig_array[newIndex].bal_edge );
+ }
+ }
+
+ if ( counter )
+ {
+ cvgAvg = cvgSum / counter / 10 > 2 ? cvgSum / counter / 10 : 3;
+ }
+
+ //mark repeats
+ int bal_i;
+
+ if ( maskRep )
+ {
+ high_cvg_cutoff1 = cvg_high * cvgAvg;
+ high_cvg_cutoff2 = cvg_high * cvgAvg * 0.8;
+ low_cvg_cutoff = cvg_low * cvgAvg;
+ counter = 0;
+ fprintf ( stderr, "Mask contigs with coverage lower than %.1f or higher than %.1f, and strict length %d.\n", low_cvg_cutoff, high_cvg_cutoff1, ctg_short_cutoff );
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ bal_i = getTwinCtg ( i );
+
+ if ( ( contig_array[i].cvg + contig_array[bal_i].cvg ) > 2 * high_cvg_cutoff1 )
+ {
+ contig_array[i].mask = 1;
+ contig_array[bal_i].mask = 1;
+
+ if ( i == bal_i )
+ {
+ counter += 1;
+ }
+ else
+ {
+ counter += 2;
+ }
+ }
+ else if ( contig_array[i].length < ctg_short_cutoff && ( contig_array[i].cvg > high_cvg_cutoff2 || contig_array[bal_i].cvg > high_cvg_cutoff2 || ( contig_array[i].cvg < low_cvg_cutoff && contig_array[bal_i].cvg < low_cvg_cutoff ) ) )
+ {
+ contig_array[i].mask = 1;
+ contig_array[bal_i].mask = 1;
+
+ if ( i == bal_i )
+ {
+ counter += 1;
+ }
+ else
+ {
+ counter += 2;
+ }
+ }
+ else if ( cvgAvg < 50 && ( contig_array[i].cvg >= 63 || contig_array[bal_i].cvg >= 63 ) )
+ {
+ contig_array[i].mask = 1;
+ contig_array[bal_i].mask = 1;
+
+ if ( i == bal_i )
+ {
+ counter += 1;
+ }
+ else
+ {
+ counter += 2;
+ }
+ }
+
+ if ( isSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+ }
+
+ fprintf ( stderr, "Average contig coverage is %d, %lld contig(s) masked.\n", cvgAvg, counter );
+ }
+
+ counter = 0;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].mask )
+ {
+ continue;
+ }
+
+ bal_i = getTwinCtg ( i );
+
+ if ( contig_array[i].length < ctg_short )
+ {
+ contig_array[i].mask = 1;
+ contig_array[bal_i].mask = 1;
+
+ if ( i == bal_i )
+ {
+ counter += 1;
+ }
+ else
+ {
+ counter += 2;
+ }
+ }
+
+ if ( isSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+ }
+
+ fprintf ( stderr, "Mask contigs shorter than %d, %lld contig(s) masked.\n", ctg_short, counter );
+ avg_arc_wt = loadArcs ( graphfile );
+ counter = 0;
+ //counter = maskRepeatByArc(avg_arc_wt);
+ //printf ("Mask contigs with multi arcs, %d contig masked\n", counter);
+ //tipsCount();
+ loadContig ( graphfile );
+ fprintf ( stderr, "Done loading updated edges.\n" );
+ free ( ( void * ) length_array );
+ free ( ( void * ) flag_array );
+ fclose ( fp );
+ fclose ( out_fp );
}
static void add1Arc ( unsigned int from_ed, unsigned int to_ed, unsigned int weight )
{
- preARC * parc;
- unsigned int from_c = index_array[from_ed];
- unsigned int to_c = index_array[to_ed];
- parc = allocatePreArc ( to_c );
- parc->multiplicity = weight;
- parc->next = contig_array[from_c].arcs;
- contig_array[from_c].arcs = parc;
+ preARC *parc;
+ unsigned int from_c = index_array[from_ed];
+ unsigned int to_c = index_array[to_ed];
+ parc = allocatePreArc ( to_c );
+ parc->multiplicity = weight;
+ parc->next = contig_array[from_c].arcs;
+ contig_array[from_c].arcs = parc;
}
/*************************************************
@@ -502,48 +538,48 @@ static void add1Arc ( unsigned int from_ed, unsigned int to_ed, unsigned int wei
Return:
The average weight of arcs.
*************************************************/
-static unsigned int loadArcs ( char * graphfile )
+static unsigned int loadArcs ( char *graphfile )
{
- FILE * fp;
- char name[256], line[1024];
- unsigned int target, weight;
- unsigned int from_ed;
- char * seg;
- unsigned int avg_weight = 0, weight_sum = 0, arc_num = 0;
- sprintf ( name, "%s.Arc", graphfile );
- fp = ckopen ( name, "r" );
- createPreArcMemManager ();
- arcCounter = 0;
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- seg = strtok ( line, " " );
- from_ed = atoi ( seg );
-
- //printf("%d\n",from_ed);
- while ( ( seg = strtok ( NULL, " " ) ) != NULL )
- {
- target = atoi ( seg );
- seg = strtok ( NULL, " " );
- weight = atoi ( seg );
- add1Arc ( from_ed, target, weight );
-
- if ( !contig_array[index_array[from_ed]].mask && !contig_array[index_array[target]].mask )
- {
- weight_sum += weight;
- ++arc_num;
- }
- }
- }
-
- if ( arc_num )
- {
- avg_weight = weight_sum / arc_num;
- }
-
- fprintf ( stderr, "%lld arc(s) loaded, average weight is %u.\n", arcCounter, avg_weight );
- fclose ( fp );
- return avg_weight;
+ FILE *fp;
+ char name[256], line[1024];
+ unsigned int target, weight;
+ unsigned int from_ed;
+ char *seg;
+ unsigned int avg_weight = 0, weight_sum = 0, arc_num = 0;
+ sprintf ( name, "%s.Arc", graphfile );
+ fp = ckopen ( name, "r" );
+ createPreArcMemManager ();
+ arcCounter = 0;
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ seg = strtok ( line, " " );
+ from_ed = atoi ( seg );
+
+ //printf("%d\n",from_ed);
+ while ( ( seg = strtok ( NULL, " " ) ) != NULL )
+ {
+ target = atoi ( seg );
+ seg = strtok ( NULL, " " );
+ weight = atoi ( seg );
+ add1Arc ( from_ed, target, weight );
+
+ if ( !contig_array[index_array[from_ed]].mask && !contig_array[index_array[target]].mask )
+ {
+ weight_sum += weight;
+ ++arc_num;
+ }
+ }
+ }
+
+ if ( arc_num )
+ {
+ avg_weight = weight_sum / arc_num;
+ }
+
+ fprintf ( stderr, "%lld arc(s) loaded, average weight is %u.\n", arcCounter, avg_weight );
+ fclose ( fp );
+ return avg_weight;
}
/*************************************************
@@ -558,85 +594,85 @@ static unsigned int loadArcs ( char * graphfile )
Return:
None.
*************************************************/
-void loadContig ( char * graphfile )
+void loadContig ( char *graphfile )
{
- char c, name[256], line[1024], *tightSeq = NULL;
- FILE * fp;
- int n = 0, length, index = -1, edgeno;
- unsigned int i;
- unsigned int newIndex;
- sprintf ( name, "%s.contig", graphfile );
- fp = ckopen ( name, "r" );
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == '>' )
- {
- if ( index >= 0 )
- {
- newIndex = index_array[edgeno];
- contig_array[newIndex].seq = tightSeq;
- }
-
- n = 0;
- index++;
- sscanf ( line + 1, "%d %s %d", &edgeno, name, &length );
- //printf("contig %d, length %d\n",edgeno,length);
- tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
- }
- else
- {
- for ( i = 0; i < strlen ( line ); i++ )
- {
- if ( line[i] >= 'a' && line[i] <= 'z' )
- {
- c = base2int ( line[i] - 'a' + 'A' );
- writeChar2tightString ( c, tightSeq, n++ );
- }
- else if ( line[i] >= 'A' && line[i] <= 'Z' )
- {
- c = base2int ( line[i] );
- writeChar2tightString ( c, tightSeq, n++ );
- }
- }
- }
- }
-
- if ( index >= 0 )
- {
- newIndex = index_array[edgeno];
- contig_array[newIndex].seq = tightSeq;
- }
-
- fprintf ( stderr, "%d contig(s) loaded.\n", index + 1 );
- fclose ( fp );
- //printf("the %dth contig with index 107\n",index);
+ char c, name[256], line[1024], *tightSeq = NULL;
+ FILE *fp;
+ int n = 0, length, index = -1, edgeno;
+ unsigned int i;
+ unsigned int newIndex;
+ sprintf ( name, "%s.contig", graphfile );
+ fp = ckopen ( name, "r" );
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == '>' )
+ {
+ if ( index >= 0 )
+ {
+ newIndex = index_array[edgeno];
+ contig_array[newIndex].seq = tightSeq;
+ }
+
+ n = 0;
+ index++;
+ sscanf ( line + 1, "%d %s %d", &edgeno, name, &length );
+ //printf("contig %d, length %d\n",edgeno,length);
+ tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
+ }
+ else
+ {
+ for ( i = 0; i < strlen ( line ); i++ )
+ {
+ if ( line[i] >= 'a' && line[i] <= 'z' )
+ {
+ c = base2int ( line[i] - 'a' + 'A' );
+ writeChar2tightString ( c, tightSeq, n++ );
+ }
+ else if ( line[i] >= 'A' && line[i] <= 'Z' )
+ {
+ c = base2int ( line[i] );
+ writeChar2tightString ( c, tightSeq, n++ );
+ }
+ }
+ }
+ }
+
+ if ( index >= 0 )
+ {
+ newIndex = index_array[edgeno];
+ contig_array[newIndex].seq = tightSeq;
+ }
+
+ fprintf ( stderr, "%d contig(s) loaded.\n", index + 1 );
+ fclose ( fp );
+ //printf("the %dth contig with index 107\n",index);
}
void freeContig_array ()
{
- if ( !contig_array )
- {
- return;
- }
-
- unsigned int i;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].seq )
- {
- free ( ( void * ) contig_array[i].seq );
- }
-
- if ( contig_array[i].closeReads )
- {
- freeStack ( contig_array[i].closeReads );
- }
- }
-
- free ( ( void * ) contig_array );
- contig_array = NULL;
+ if ( !contig_array )
+ {
+ return;
+ }
+
+ unsigned int i;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].seq )
+ {
+ free ( ( void * ) contig_array[i].seq );
+ }
+
+ if ( contig_array[i].closeReads )
+ {
+ freeStack ( contig_array[i].closeReads );
+ }
+ }
+
+ free ( ( void * ) contig_array );
+ contig_array = NULL;
}
/*
diff --git a/standardPregraph/loadPath.c b/standardPregraph/loadPath.c
index 38d280a..5113da4 100644
--- a/standardPregraph/loadPath.c
+++ b/standardPregraph/loadPath.c
@@ -1,7 +1,7 @@
/*
* loadPath.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -41,16 +41,16 @@ Return:
*************************************************/
static void add1marker2edge ( unsigned int edgeno, long long readid )
{
- if ( edge_array[edgeno].multi == 255 )
- {
- return;
- }
-
- unsigned int bal_ed = getTwinEdge ( edgeno );
- unsigned char counter = edge_array[edgeno].multi++;
- edge_array[edgeno].markers[counter] = readid;
- counter = edge_array[bal_ed].multi++;
- edge_array[bal_ed].markers[counter] = -readid;
+ if ( edge_array[edgeno].multi == 255 )
+ {
+ return;
+ }
+
+ unsigned int bal_ed = getTwinEdge ( edgeno );
+ unsigned char counter = edge_array[edgeno].multi++;
+ edge_array[edgeno].markers[counter] = readid;
+ counter = edge_array[bal_ed].multi++;
+ edge_array[bal_ed].markers[counter] = -readid;
}
/*************************************************
@@ -66,133 +66,139 @@ Output:
Return:
None.
*************************************************/
-boolean loadPath ( char * graphfile )
+boolean loadPath ( char *graphfile )
{
- FILE * fp;
- char name[256], line[1024];
- unsigned int i, bal_ed, num1, edgeno, num2;
- long long markCounter = 0, readid = 0;
- char * seg;
- sprintf ( name, "%s.markOnEdge", graphfile );
- fp = fopen ( name, "r" );
-
- if ( !fp )
- {
- return 0;
- }
-
- for ( i = 1; i <= num_ed; i++ )
- {
- edge_array[i].multi = 0;
- }
-
- for ( i = 1; i <= num_ed; i++ )
- {
- fscanf ( fp, "%d", &num1 );
-
- if ( EdSmallerThanTwin ( i ) )
- {
- fscanf ( fp, "%d", &num2 );
- bal_ed = getTwinEdge ( i );
-
- if ( num1 + num2 >= 255 )
- {
- edge_array[i].multi = 255;
- edge_array[bal_ed].multi = 255;
- }
- else
- {
- edge_array[i].multi = num1 + num2;
- edge_array[bal_ed].multi = num1 + num2;
- markCounter += 2 * ( num1 + num2 );
- }
-
- i++;
- }
- else
- {
- if ( 2 * num1 >= 255 )
- {
- edge_array[i].multi = 255;
- }
- else
- {
- edge_array[i].multi = 2 * num1;
- markCounter += 2 * num1;
- }
- }
- }
-
- fclose ( fp );
- fprintf ( stderr, "%lld markers overall.\n", markCounter );
- markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) );
- markCounter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].multi == 255 )
- {
- continue;
- }
-
- edge_array[i].markers = markersArray + markCounter;
- markCounter += edge_array[i].multi;
- edge_array[i].multi = 0;
- }
-
- sprintf ( name, "%s.path", graphfile );
- fp = fopen ( name, "r" );
-
- if ( !fp )
- {
- return 0;
- }
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- //printf("%s",line);
- readid++;
- seg = strtok ( line, " " );
-
- while ( seg )
- {
- edgeno = atoi ( seg );
- //printf("%s, %d\n",seg,edgeno);
- add1marker2edge ( edgeno, readid );
- seg = strtok ( NULL, " " );
- }
- }
-
- fclose ( fp );
- markCounter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].multi == 255 )
- {
- continue;
- }
-
- markCounter += edge_array[i].multi;
- }
-
- fprintf ( stderr, "%lld marks loaded.\n", markCounter );
- return 1;
+ FILE *fp;
+ char name[256], line[1024];
+ unsigned int i, bal_ed, num1, edgeno, num2;
+ long long markCounter = 0, readid = 0;
+ char *seg;
+ sprintf ( name, "%s.markOnEdge", graphfile );
+ fp = fopen ( name, "r" );
+
+ if ( !fp )
+ {
+ return 0;
+ }
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ edge_array[i].multi = 0;
+ }
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ fscanf ( fp, "%d", &num1 );
+
+ if ( EdSmallerThanTwin ( i ) )
+ {
+ fscanf ( fp, "%d", &num2 );
+ bal_ed = getTwinEdge ( i );
+
+ if ( num1 + num2 >= 255 )
+ {
+ edge_array[i].multi = 255;
+ edge_array[bal_ed].multi = 255;
+ }
+ else
+ {
+ edge_array[i].multi = num1 + num2;
+ edge_array[bal_ed].multi = num1 + num2;
+ markCounter += 2 * ( num1 + num2 );
+ }
+
+ i++;
+ }
+ else
+ {
+ if ( 2 * num1 >= 255 )
+ {
+ edge_array[i].multi = 255;
+ }
+ else
+ {
+ edge_array[i].multi = 2 * num1;
+ markCounter += 2 * num1;
+ }
+ }
+ }
+
+ fclose ( fp );
+ fprintf ( stderr, "%lld markers overall.\n", markCounter );
+ markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) );
+ markCounter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].multi == 255 )
+ {
+ continue;
+ }
+
+ edge_array[i].markers = markersArray + markCounter;
+ markCounter += edge_array[i].multi;
+ edge_array[i].multi = 0;
+ }
+
+ sprintf ( name, "%s.path", graphfile );
+ fp = fopen ( name, "r" );
+
+ if ( !fp )
+ {
+ return 0;
+ }
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ //printf("%s",line);
+ readid++;
+ seg = strtok ( line, " " );
+
+ while ( seg )
+ {
+ edgeno = atoi ( seg );
+ //printf("%s, %d\n",seg,edgeno);
+ add1marker2edge ( edgeno, readid );
+ seg = strtok ( NULL, " " );
+ }
+ }
+
+ fclose ( fp );
+ markCounter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].multi == 255 )
+ {
+ continue;
+ }
+
+ markCounter += edge_array[i].multi;
+ }
+
+ fprintf ( stderr, "%lld marks loaded.\n", markCounter );
+ return 1;
}
-static int comp ( const void * a , const void * b )
+static int comp ( const void *a , const void *b )
{
- long long m , n ;
- m = * ( long long * ) a > 0 ? * ( long long * ) a : -* ( long long * ) a;
- n = * ( long long * ) b > 0 ? * ( long long * ) b : -* ( long long * ) b;
-
- // return (int)(m-n);
- if ( m > n )
- { return 1; }
- else if ( m < n )
- { return -1; }
- else
- { return 0; }
+ long long m , n ;
+ m = * ( long long * ) a > 0 ? * ( long long * ) a : -* ( long long * ) a;
+ n = * ( long long * ) b > 0 ? * ( long long * ) b : -* ( long long * ) b;
+
+ // return (int)(m-n);
+ if ( m > n )
+ {
+ return 1;
+ }
+ else if ( m < n )
+ {
+ return -1;
+ }
+ else
+ {
+ return 0;
+ }
}
/*************************************************
@@ -209,128 +215,130 @@ Return:
0 if it's fail to load the path.
*************************************************/
-boolean loadPathBin ( char * graphfile )
+boolean loadPathBin ( char *graphfile )
{
- FILE * fp;
- char name[256];
- unsigned int i, bal_ed, num1, num2;
- long long markCounter = 0, readid = 0;
- unsigned char seg, ch;
- unsigned int * freadBuf;
- sprintf ( name, "%s.markOnEdge", graphfile );
- fp = fopen ( name, "r" );
-
- if ( !fp )
- {
- return 0;
- }
-
- for ( i = 1; i <= num_ed; i++ )
- {
- edge_array[i].multi = 0;
- edge_array[i].markers = NULL;
- }
-
- for ( i = 1; i <= num_ed; i++ )
- {
- fscanf ( fp, "%d", &num1 );
-
- if ( EdSmallerThanTwin ( i ) )
- {
- fscanf ( fp, "%d", &num2 );
- bal_ed = getTwinEdge ( i );
-
- if ( num1 + num2 >= 255 )
- {
- edge_array[i].multi = 255;
- edge_array[bal_ed].multi = 255;
- }
- else
- {
- edge_array[i].multi = num1 + num2;
- edge_array[bal_ed].multi = num1 + num2;
- markCounter += 2 * ( num1 + num2 );
- }
-
- i++;
- }
- else
- {
- if ( 2 * num1 >= 255 )
- {
- edge_array[i].multi = 255;
- }
- else
- {
- edge_array[i].multi = 2 * num1;
- markCounter += 2 * num1;
- }
- }
- }
-
- fclose ( fp );
- fprintf ( stderr, "%lld markers overall.\n", markCounter );
- markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) );
- markCounter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].multi == 255 )
- {
- continue;
- }
-
- edge_array[i].markers = markersArray + markCounter;
- markCounter += edge_array[i].multi;
- edge_array[i].multi = 0;
- }
-
- sprintf ( name, "%s.path", graphfile );
- fp = fopen ( name, "rb" );
-
- if ( !fp )
- {
- return 0;
- }
-
- freadBuf = ( unsigned int * ) ckalloc ( ( maxReadLen - overlaplen + 1 ) * sizeof ( unsigned int ) );
-
- while ( fread ( &ch, sizeof ( char ), 1, fp ) == 1 )
- {
- //printf("%s",line);
- if ( fread ( freadBuf, sizeof ( unsigned int ), ch, fp ) != ch )
- {
- break;
- }
-
- readid++;
-
- for ( seg = 0; seg < ch; seg++ )
- {
- add1marker2edge ( freadBuf[seg], readid );
- }
- }
-
- fclose ( fp );
- markCounter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( edge_array[i].multi == 255 )
- {
- continue;
- }
-
- markCounter += edge_array[i].multi;
- }
-
- for ( i = 0; i <= num_ed; i++ )
- {
- if ( edge_array[i].multi >= 2 && edge_array[i].multi != 255 )
- { qsort ( edge_array[i].markers, ( int ) edge_array[i].multi, sizeof ( long long ), comp ); }
- }
-
- fprintf ( stderr, "%lld markers loaded.\n", markCounter );
- free ( ( void * ) freadBuf );
- return 1;
+ FILE *fp;
+ char name[256];
+ unsigned int i, bal_ed, num1, num2;
+ long long markCounter = 0, readid = 0;
+ unsigned char seg, ch;
+ unsigned int *freadBuf;
+ sprintf ( name, "%s.markOnEdge", graphfile );
+ fp = fopen ( name, "r" );
+
+ if ( !fp )
+ {
+ return 0;
+ }
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ edge_array[i].multi = 0;
+ edge_array[i].markers = NULL;
+ }
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ fscanf ( fp, "%d", &num1 );
+
+ if ( EdSmallerThanTwin ( i ) )
+ {
+ fscanf ( fp, "%d", &num2 );
+ bal_ed = getTwinEdge ( i );
+
+ if ( num1 + num2 >= 255 )
+ {
+ edge_array[i].multi = 255;
+ edge_array[bal_ed].multi = 255;
+ }
+ else
+ {
+ edge_array[i].multi = num1 + num2;
+ edge_array[bal_ed].multi = num1 + num2;
+ markCounter += 2 * ( num1 + num2 );
+ }
+
+ i++;
+ }
+ else
+ {
+ if ( 2 * num1 >= 255 )
+ {
+ edge_array[i].multi = 255;
+ }
+ else
+ {
+ edge_array[i].multi = 2 * num1;
+ markCounter += 2 * num1;
+ }
+ }
+ }
+
+ fclose ( fp );
+ fprintf ( stderr, "%lld markers overall.\n", markCounter );
+ markersArray = ( long long * ) ckalloc ( markCounter * sizeof ( long long ) );
+ markCounter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].multi == 255 )
+ {
+ continue;
+ }
+
+ edge_array[i].markers = markersArray + markCounter;
+ markCounter += edge_array[i].multi;
+ edge_array[i].multi = 0;
+ }
+
+ sprintf ( name, "%s.path", graphfile );
+ fp = fopen ( name, "rb" );
+
+ if ( !fp )
+ {
+ return 0;
+ }
+
+ freadBuf = ( unsigned int * ) ckalloc ( ( maxReadLen - overlaplen + 1 ) * sizeof ( unsigned int ) );
+
+ while ( fread ( &ch, sizeof ( char ), 1, fp ) == 1 )
+ {
+ //printf("%s",line);
+ if ( fread ( freadBuf, sizeof ( unsigned int ), ch, fp ) != ch )
+ {
+ break;
+ }
+
+ readid++;
+
+ for ( seg = 0; seg < ch; seg++ )
+ {
+ add1marker2edge ( freadBuf[seg], readid );
+ }
+ }
+
+ fclose ( fp );
+ markCounter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].multi == 255 )
+ {
+ continue;
+ }
+
+ markCounter += edge_array[i].multi;
+ }
+
+ for ( i = 0; i <= num_ed; i++ )
+ {
+ if ( edge_array[i].multi >= 2 && edge_array[i].multi != 255 )
+ {
+ qsort ( edge_array[i].markers, ( int ) edge_array[i].multi, sizeof ( long long ), comp );
+ }
+ }
+
+ fprintf ( stderr, "%lld markers loaded.\n", markCounter );
+ free ( ( void * ) freadBuf );
+ return 1;
}
diff --git a/standardPregraph/loadPreGraph.c b/standardPregraph/loadPreGraph.c
index 1422eab..1933e88 100644
--- a/standardPregraph/loadPreGraph.c
+++ b/standardPregraph/loadPreGraph.c
@@ -1,7 +1,7 @@
/*
* loadPreGraph.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -27,26 +27,26 @@
#include "extvab.h"
#include "zlib.h"
-static void loadPreArcs ( char * graphfile );
+static void loadPreArcs ( char *graphfile );
-int cmp_vertex ( const void * a, const void * b )
+int cmp_vertex ( const void *a, const void *b )
{
- VERTEX * A, *B;
- A = ( VERTEX * ) a;
- B = ( VERTEX * ) b;
-
- if ( KmerLarger ( A->kmer, B->kmer ) )
- {
- return 1;
- }
- else if ( KmerEqual ( A->kmer, B->kmer ) )
- {
- return 0;
- }
- else
- {
- return -1;
- }
+ VERTEX *A, *B;
+ A = ( VERTEX * ) a;
+ B = ( VERTEX * ) b;
+
+ if ( KmerLarger ( A->kmer, B->kmer ) )
+ {
+ return 1;
+ }
+ else if ( KmerEqual ( A->kmer, B->kmer ) )
+ {
+ return 0;
+ }
+ else
+ {
+ return -1;
+ }
}
/*************************************************
@@ -62,88 +62,88 @@ Output:
Return:
None.
*************************************************/
-void loadVertex ( char * graphfile )
+void loadVertex ( char *graphfile )
{
- char name[256], line[256];
- FILE * fp;
- Kmer word, bal_word, temp;
- int num_kmer, i;
- char ch;
- sprintf ( name, "%s.preGraphBasic", graphfile );
- fp = ckopen ( name, "r" );
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == 'V' )
- {
- sscanf ( line + 6, "%d %c %d", &num_kmer, &ch, &overlaplen );
- fprintf ( stderr, "There are %d kmer(s) in vertex file.\n", num_kmer );
- }
- else if ( line[0] == 'E' )
- {
- sscanf ( line + 5, "%d", &num_ed );
- fprintf ( stderr, "There are %d edge(s) in edge file.\n", num_ed );
- }
- else if ( line[0] == 'M' )
- {
- sscanf ( line, "MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxReadLen, &minReadLen, &maxNameLen );
- }
- else if ( line[0] == 'B' )
- {
- if ( line[7] == 'V' )
- {
- sscanf ( line, "Backup VERTEX %d %c %d", &num_kmer, &ch, &overlaplen );
- fprintf ( stderr, "Backup there are %d kmer(s) in vertex file.\n", num_kmer );
- }
- else if ( line[7] == 'E' )
- {
- sscanf ( line, "Backup EDGEs %d", &num_ed );
- fprintf ( stderr, "Backup there are %d edge(s) in edge file.\n", num_ed );
- }
- else if ( line[7] == 'M' )
- {
- sscanf ( line, "Backup MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxReadLen, &minReadLen, &maxNameLen );
- }
- }
- }
-
- fclose ( fp );
- vt_array = ( VERTEX * ) ckalloc ( ( 4 * num_kmer ) * sizeof ( VERTEX ) );
- num_kmer_limit = 4 * num_kmer;
- sprintf ( name, "%s.vertex", graphfile );
- fp = ckopen ( name, "r" );
-
- for ( i = 0; i < num_kmer; i++ )
- {
+ char name[256], line[256];
+ FILE *fp;
+ Kmer word, bal_word, temp;
+ int num_kmer, i;
+ char ch;
+ sprintf ( name, "%s.preGraphBasic", graphfile );
+ fp = ckopen ( name, "r" );
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == 'V' )
+ {
+ sscanf ( line + 6, "%d %c %d", &num_kmer, &ch, &overlaplen );
+ fprintf ( stderr, "There are %d kmer(s) in vertex file.\n", num_kmer );
+ }
+ else if ( line[0] == 'E' )
+ {
+ sscanf ( line + 5, "%d", &num_ed );
+ fprintf ( stderr, "There are %d edge(s) in edge file.\n", num_ed );
+ }
+ else if ( line[0] == 'M' )
+ {
+ sscanf ( line, "MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxReadLen, &minReadLen, &maxNameLen );
+ }
+ else if ( line[0] == 'B' )
+ {
+ if ( line[7] == 'V' )
+ {
+ sscanf ( line, "Backup VERTEX %d %c %d", &num_kmer, &ch, &overlaplen );
+ fprintf ( stderr, "Backup there are %d kmer(s) in vertex file.\n", num_kmer );
+ }
+ else if ( line[7] == 'E' )
+ {
+ sscanf ( line, "Backup EDGEs %d", &num_ed );
+ fprintf ( stderr, "Backup there are %d edge(s) in edge file.\n", num_ed );
+ }
+ else if ( line[7] == 'M' )
+ {
+ sscanf ( line, "Backup MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxReadLen, &minReadLen, &maxNameLen );
+ }
+ }
+ }
+
+ fclose ( fp );
+ vt_array = ( VERTEX * ) ckalloc ( ( 4 * num_kmer ) * sizeof ( VERTEX ) );
+ num_kmer_limit = 4 * num_kmer;
+ sprintf ( name, "%s.vertex", graphfile );
+ fp = ckopen ( name, "r" );
+
+ for ( i = 0; i < num_kmer; i++ )
+ {
#ifdef MER127
- fscanf ( fp, "%llx %llx %llx %llx", & ( word.high1 ), & ( word.low1 ), & ( word.high2 ), & ( word.low2 ) );
+ fscanf ( fp, "%llx %llx %llx %llx", & ( word.high1 ), & ( word.low1 ), & ( word.high2 ), & ( word.low2 ) );
#else
- fscanf ( fp, "%llx %llx", & ( word.high ), & ( word.low ) );
+ fscanf ( fp, "%llx %llx", & ( word.high ), & ( word.low ) );
#endif
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- vt_array[i].kmer = word;
- }
- else
- {
- vt_array[i].kmer = bal_word;
- }
- }
-
- temp = vt_array[num_kmer - 1].kmer;
- qsort ( &vt_array[0], num_kmer, sizeof ( vt_array[0] ), cmp_vertex );
- fprintf ( stderr, "Kmers sorted.\n" );
- fclose ( fp );
-
- for ( i = 0; i < num_kmer; i++ )
- {
- bal_word = reverseComplement ( vt_array[i].kmer, overlaplen );
- vt_array[i + num_kmer].kmer = bal_word;
- }
-
- num_vt = num_kmer;
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ vt_array[i].kmer = word;
+ }
+ else
+ {
+ vt_array[i].kmer = bal_word;
+ }
+ }
+
+ temp = vt_array[num_kmer - 1].kmer;
+ qsort ( &vt_array[0], num_kmer, sizeof ( vt_array[0] ), cmp_vertex );
+ fprintf ( stderr, "Kmers sorted.\n" );
+ fclose ( fp );
+
+ for ( i = 0; i < num_kmer; i++ )
+ {
+ bal_word = reverseComplement ( vt_array[i].kmer, overlaplen );
+ vt_array[i + num_kmer].kmer = bal_word;
+ }
+
+ num_vt = num_kmer;
}
/*************************************************
@@ -160,38 +160,38 @@ Output:
Return:
The kmer's index in array.
*************************************************/
-int bisearch ( VERTEX * vts, int num, Kmer target )
+int bisearch ( VERTEX *vts, int num, Kmer target )
{
- int mid, low, high;
- low = 0;
- high = num - 1;
-
- while ( low <= high )
- {
- mid = ( low + high ) / 2;
-
- if ( KmerEqual ( vts[mid].kmer, target ) )
- {
- break;
- }
- else if ( KmerLarger ( target, vts[mid].kmer ) )
- {
- low = mid + 1;
- }
- else
- {
- high = mid - 1;
- }
- }
-
- if ( low <= high )
- {
- return mid;
- }
- else
- {
- return -1;
- }
+ int mid, low, high;
+ low = 0;
+ high = num - 1;
+
+ while ( low <= high )
+ {
+ mid = ( low + high ) / 2;
+
+ if ( KmerEqual ( vts[mid].kmer, target ) )
+ {
+ break;
+ }
+ else if ( KmerLarger ( target, vts[mid].kmer ) )
+ {
+ low = mid + 1;
+ }
+ else
+ {
+ high = mid - 1;
+ }
+ }
+
+ if ( low <= high )
+ {
+ return mid;
+ }
+ else
+ {
+ return -1;
+ }
}
/*************************************************
@@ -208,54 +208,54 @@ Return:
*************************************************/
int kmer2vt ( Kmer kmer )
{
- Kmer bal_word;
- int vt_id;
- bal_word = reverseComplement ( kmer, overlaplen );
-
- if ( KmerSmaller ( kmer, bal_word ) )
- {
- vt_id = bisearch ( &vt_array[0], num_vt, kmer );
-
- if ( vt_id < 0 )
- {
- fprintf ( stderr, "There is no vertex for kmer " );
- PrintKmer ( stderr, kmer );
- fprintf ( stderr, " .\n" );
- /*
- #ifdef MER127
- fprintf (stderr,"There is not the vertex for kmer %llx %llx %llx %llx.\n", kmer.high1, kmer.low1, kmer.high2, kmer.low2);
- #else
- fprintf (stderr,"There is not the vertex for kmer %llx %llx.\n", kmer.high, kmer.low);
- #endif
- */
- }
-
- return vt_id;
- }
- else
- {
- vt_id = bisearch ( &vt_array[0], num_vt, bal_word );
-
- if ( vt_id >= 0 )
- {
- vt_id += num_vt;
- }
- else
- {
- fprintf ( stderr, "There is no vertex for kmer " );
- PrintKmer ( stderr, kmer );
- fprintf ( stderr, " .\n" );
- /*
- #ifdef MER127
- fprintf (stderr,"There is not the vertex for kmer %llx %llx %llx %llx.\n", kmer.high1, kmer.low1, kmer.high2, kmer.low2);
- #else
- fprintf (stderr,"There is not the vertex for kmer %llx %llx.\n", kmer.high, kmer.low);
- #endif
- */
- }
-
- return vt_id;
- }
+ Kmer bal_word;
+ int vt_id;
+ bal_word = reverseComplement ( kmer, overlaplen );
+
+ if ( KmerSmaller ( kmer, bal_word ) )
+ {
+ vt_id = bisearch ( &vt_array[0], num_vt, kmer );
+
+ if ( vt_id < 0 )
+ {
+ fprintf ( stderr, "There is no vertex for kmer " );
+ PrintKmer ( stderr, kmer );
+ fprintf ( stderr, " .\n" );
+ /*
+ #ifdef MER127
+ fprintf (stderr,"There is not the vertex for kmer %llx %llx %llx %llx.\n", kmer.high1, kmer.low1, kmer.high2, kmer.low2);
+ #else
+ fprintf (stderr,"There is not the vertex for kmer %llx %llx.\n", kmer.high, kmer.low);
+ #endif
+ */
+ }
+
+ return vt_id;
+ }
+ else
+ {
+ vt_id = bisearch ( &vt_array[0], num_vt, bal_word );
+
+ if ( vt_id >= 0 )
+ {
+ vt_id += num_vt;
+ }
+ else
+ {
+ fprintf ( stderr, "There is no vertex for kmer " );
+ PrintKmer ( stderr, kmer );
+ fprintf ( stderr, " .\n" );
+ /*
+ #ifdef MER127
+ fprintf (stderr,"There is not the vertex for kmer %llx %llx %llx %llx.\n", kmer.high1, kmer.low1, kmer.high2, kmer.low2);
+ #else
+ fprintf (stderr,"There is not the vertex for kmer %llx %llx.\n", kmer.high, kmer.low);
+ #endif
+ */
+ }
+
+ return vt_id;
+ }
}
/*************************************************
@@ -273,98 +273,98 @@ Return:
#ifdef MER127
static void buildReverseComplementEdge ( unsigned int edgeno )
{
- int length = edge_array[edgeno].length;
- int i, index = 0;
- char * sequence, ch, *tightSeq;
- Kmer kmer = vt_array[edge_array[edgeno].from_vt].kmer;
- sequence = ( char * ) ckalloc ( ( overlaplen + length ) * sizeof ( char ) );
- int bit1, bit2, bit3, bit4;
-
- if ( overlaplen < 32 )
- {
- bit4 = overlaplen;
- bit3 = 0;
- bit2 = 0;
- bit1 = 0;
- }
-
- if ( overlaplen >= 32 && overlaplen < 64 )
- {
- bit4 = 32;
- bit3 = overlaplen - 32;
- bit2 = 0;
- bit1 = 0;
- }
-
- if ( overlaplen >= 64 && overlaplen < 96 )
- {
- bit4 = 32;
- bit3 = 32;
- bit2 = overlaplen - 64;
- bit1 = 0;
- }
-
- if ( overlaplen >= 96 && overlaplen < 128 )
- {
- bit4 = 32;
- bit3 = 32;
- bit2 = 32;
- bit1 = overlaplen - 96;
- }
-
- for ( i = bit1 - 1; i >= 0; i-- )
- {
- ch = kmer.high1 & 0x3;
- kmer.high1 >>= 2;
- sequence[i] = ch;
- }
-
- for ( i = bit2 - 1; i >= 0; i-- )
- {
- ch = kmer.low1 & 0x3;
- kmer.low1 >>= 2;
- sequence[i + bit1] = ch;
- }
-
- for ( i = bit3 - 1; i >= 0; i-- )
- {
- ch = kmer.high2 & 0x3;
- kmer.high2 >>= 2;
- sequence[i + bit1 + bit2] = ch;
- }
-
- for ( i = bit4 - 1; i >= 0; i-- )
- {
- ch = kmer.low2 & 0x3;
- kmer.low2 >>= 2;
- sequence[i + bit1 + bit2 + bit3] = ch;
- }
-
- for ( i = 0; i < length; i++ )
- {
- sequence[i + overlaplen] = getCharInTightString ( edge_array[edgeno].seq, i );
- }
-
- tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
-
- for ( i = length - 1; i >= 0; i-- )
- {
- writeChar2tightString ( int_comp ( sequence[i] ), tightSeq, index++ );
- }
-
- edge_array[edgeno + 1].length = length;
- edge_array[edgeno + 1].cvg = edge_array[edgeno].cvg;
- kmer = vt_array[edge_array[edgeno].from_vt].kmer;
- edge_array[edgeno + 1].to_vt = kmer2vt ( reverseComplement ( kmer, overlaplen ) );
- kmer = vt_array[edge_array[edgeno].to_vt].kmer;
- edge_array[edgeno + 1].from_vt = kmer2vt ( reverseComplement ( kmer, overlaplen ) );
- edge_array[edgeno + 1].seq = tightSeq;
- edge_array[edgeno + 1].bal_edge = 0;
- edge_array[edgeno + 1].rv = NULL;
- edge_array[edgeno + 1].arcs = NULL;
- edge_array[edgeno + 1].flag = 0;
- edge_array[edgeno + 1].deleted = 0;
- free ( ( void * ) sequence );
+ int length = edge_array[edgeno].length;
+ int i, index = 0;
+ char *sequence, ch, *tightSeq;
+ Kmer kmer = vt_array[edge_array[edgeno].from_vt].kmer;
+ sequence = ( char * ) ckalloc ( ( overlaplen + length ) * sizeof ( char ) );
+ int bit1, bit2, bit3, bit4;
+
+ if ( overlaplen < 32 )
+ {
+ bit4 = overlaplen;
+ bit3 = 0;
+ bit2 = 0;
+ bit1 = 0;
+ }
+
+ if ( overlaplen >= 32 && overlaplen < 64 )
+ {
+ bit4 = 32;
+ bit3 = overlaplen - 32;
+ bit2 = 0;
+ bit1 = 0;
+ }
+
+ if ( overlaplen >= 64 && overlaplen < 96 )
+ {
+ bit4 = 32;
+ bit3 = 32;
+ bit2 = overlaplen - 64;
+ bit1 = 0;
+ }
+
+ if ( overlaplen >= 96 && overlaplen < 128 )
+ {
+ bit4 = 32;
+ bit3 = 32;
+ bit2 = 32;
+ bit1 = overlaplen - 96;
+ }
+
+ for ( i = bit1 - 1; i >= 0; i-- )
+ {
+ ch = kmer.high1 & 0x3;
+ kmer.high1 >>= 2;
+ sequence[i] = ch;
+ }
+
+ for ( i = bit2 - 1; i >= 0; i-- )
+ {
+ ch = kmer.low1 & 0x3;
+ kmer.low1 >>= 2;
+ sequence[i + bit1] = ch;
+ }
+
+ for ( i = bit3 - 1; i >= 0; i-- )
+ {
+ ch = kmer.high2 & 0x3;
+ kmer.high2 >>= 2;
+ sequence[i + bit1 + bit2] = ch;
+ }
+
+ for ( i = bit4 - 1; i >= 0; i-- )
+ {
+ ch = kmer.low2 & 0x3;
+ kmer.low2 >>= 2;
+ sequence[i + bit1 + bit2 + bit3] = ch;
+ }
+
+ for ( i = 0; i < length; i++ )
+ {
+ sequence[i + overlaplen] = getCharInTightString ( edge_array[edgeno].seq, i );
+ }
+
+ tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
+
+ for ( i = length - 1; i >= 0; i-- )
+ {
+ writeChar2tightString ( int_comp ( sequence[i] ), tightSeq, index++ );
+ }
+
+ edge_array[edgeno + 1].length = length;
+ edge_array[edgeno + 1].cvg = edge_array[edgeno].cvg;
+ kmer = vt_array[edge_array[edgeno].from_vt].kmer;
+ edge_array[edgeno + 1].to_vt = kmer2vt ( reverseComplement ( kmer, overlaplen ) );
+ kmer = vt_array[edge_array[edgeno].to_vt].kmer;
+ edge_array[edgeno + 1].from_vt = kmer2vt ( reverseComplement ( kmer, overlaplen ) );
+ edge_array[edgeno + 1].seq = tightSeq;
+ edge_array[edgeno + 1].bal_edge = 0;
+ edge_array[edgeno + 1].rv = NULL;
+ edge_array[edgeno + 1].arcs = NULL;
+ edge_array[edgeno + 1].flag = 0;
+ edge_array[edgeno + 1].deleted = 0;
+ free ( ( void * ) sequence );
}
#else
@@ -382,53 +382,53 @@ Return:
*************************************************/
static void buildReverseComplementEdge ( unsigned int edgeno )
{
- int length = edge_array[edgeno].length;
- int i, index = 0;
- char * sequence, ch, *tightSeq;
- Kmer kmer = vt_array[edge_array[edgeno].from_vt].kmer;
- sequence = ( char * ) ckalloc ( ( overlaplen + length ) * sizeof ( char ) );
- int bit2 = overlaplen > 32 ? 32 : overlaplen;
- int bit1 = overlaplen > 32 ? overlaplen - 32 : 0;
-
- for ( i = bit1 - 1; i >= 0; i-- )
- {
- ch = kmer.high & 0x3;
- kmer.high >>= 2;
- sequence[i] = ch;
- }
-
- for ( i = bit2 - 1; i >= 0; i-- )
- {
- ch = kmer.low & 0x3;
- kmer.low >>= 2;
- sequence[i + bit1] = ch;
- }
-
- for ( i = 0; i < length; i++ )
- {
- sequence[i + overlaplen] = getCharInTightString ( edge_array[edgeno].seq, i );
- }
-
- tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
-
- for ( i = length - 1; i >= 0; i-- )
- {
- writeChar2tightString ( int_comp ( sequence[i] ), tightSeq, index++ );
- }
-
- edge_array[edgeno + 1].length = length;
- edge_array[edgeno + 1].cvg = edge_array[edgeno].cvg;
- kmer = vt_array[edge_array[edgeno].from_vt].kmer;
- edge_array[edgeno + 1].to_vt = kmer2vt ( reverseComplement ( kmer, overlaplen ) );
- kmer = vt_array[edge_array[edgeno].to_vt].kmer;
- edge_array[edgeno + 1].from_vt = kmer2vt ( reverseComplement ( kmer, overlaplen ) );
- edge_array[edgeno + 1].seq = tightSeq;
- edge_array[edgeno + 1].bal_edge = 0;
- edge_array[edgeno + 1].rv = NULL;
- edge_array[edgeno + 1].arcs = NULL;
- edge_array[edgeno + 1].flag = 0;
- edge_array[edgeno + 1].deleted = 0;
- free ( ( void * ) sequence );
+ int length = edge_array[edgeno].length;
+ int i, index = 0;
+ char *sequence, ch, *tightSeq;
+ Kmer kmer = vt_array[edge_array[edgeno].from_vt].kmer;
+ sequence = ( char * ) ckalloc ( ( overlaplen + length ) * sizeof ( char ) );
+ int bit2 = overlaplen > 32 ? 32 : overlaplen;
+ int bit1 = overlaplen > 32 ? overlaplen - 32 : 0;
+
+ for ( i = bit1 - 1; i >= 0; i-- )
+ {
+ ch = kmer.high & 0x3;
+ kmer.high >>= 2;
+ sequence[i] = ch;
+ }
+
+ for ( i = bit2 - 1; i >= 0; i-- )
+ {
+ ch = kmer.low & 0x3;
+ kmer.low >>= 2;
+ sequence[i + bit1] = ch;
+ }
+
+ for ( i = 0; i < length; i++ )
+ {
+ sequence[i + overlaplen] = getCharInTightString ( edge_array[edgeno].seq, i );
+ }
+
+ tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
+
+ for ( i = length - 1; i >= 0; i-- )
+ {
+ writeChar2tightString ( int_comp ( sequence[i] ), tightSeq, index++ );
+ }
+
+ edge_array[edgeno + 1].length = length;
+ edge_array[edgeno + 1].cvg = edge_array[edgeno].cvg;
+ kmer = vt_array[edge_array[edgeno].from_vt].kmer;
+ edge_array[edgeno + 1].to_vt = kmer2vt ( reverseComplement ( kmer, overlaplen ) );
+ kmer = vt_array[edge_array[edgeno].to_vt].kmer;
+ edge_array[edgeno + 1].from_vt = kmer2vt ( reverseComplement ( kmer, overlaplen ) );
+ edge_array[edgeno + 1].seq = tightSeq;
+ edge_array[edgeno + 1].bal_edge = 0;
+ edge_array[edgeno + 1].rv = NULL;
+ edge_array[edgeno + 1].arcs = NULL;
+ edge_array[edgeno + 1].flag = 0;
+ edge_array[edgeno + 1].deleted = 0;
+ free ( ( void * ) sequence );
}
#endif
@@ -445,122 +445,122 @@ Output:
Return:
None.
*************************************************/
-void loadEdge ( char * graphfile )
+void loadEdge ( char *graphfile )
{
- char c, name[256], line[1024], str[32];
- char * tightSeq = NULL;
- gzFile * fp;
- Kmer from_kmer, to_kmer;
- int n = 0, i, length, cvg, index = -1, bal_ed, edgeno;
- int linelen;
- unsigned int j;
- sprintf ( name, "%s.edge.gz", graphfile );
- fp = gzopen ( name, "r" );
- num_ed_limit = 1.2 * num_ed;
- edge_array = ( EDGE * ) ckalloc ( ( num_ed_limit + 1 ) * sizeof ( EDGE ) );
-
- for ( j = num_ed + 1; j <= num_ed_limit; j++ )
- {
- edge_array[j].seq = NULL;
- }
-
- while ( gzgets ( fp, line, sizeof ( line ) ) != NULL )
- {
- if ( line[0] == '>' )
- {
- if ( index >= 0 )
- {
- edgeno = index + 1;
- edge_array[edgeno].length = length;
- edge_array[edgeno].cvg = cvg;
- edge_array[edgeno].from_vt = kmer2vt ( from_kmer );
- edge_array[edgeno].to_vt = kmer2vt ( to_kmer );
- edge_array[edgeno].seq = tightSeq;
- edge_array[edgeno].bal_edge = bal_ed + 1;
- edge_array[edgeno].rv = NULL;
- edge_array[edgeno].arcs = NULL;
- edge_array[edgeno].flag = 0;
- edge_array[edgeno].deleted = 0;
-
- if ( bal_ed )
- {
- buildReverseComplementEdge ( edgeno );
- index++;
- }
- }
-
- n = 0;
- index++;
+ char c, name[256], line[1024], str[32];
+ char *tightSeq = NULL;
+ gzFile *fp;
+ Kmer from_kmer, to_kmer;
+ int n = 0, i, length, cvg, index = -1, bal_ed, edgeno;
+ int linelen;
+ unsigned int j;
+ sprintf ( name, "%s.edge.gz", graphfile );
+ fp = gzopen ( name, "r" );
+ num_ed_limit = 1.2 * num_ed;
+ edge_array = ( EDGE * ) ckalloc ( ( num_ed_limit + 1 ) * sizeof ( EDGE ) );
+
+ for ( j = num_ed + 1; j <= num_ed_limit; j++ )
+ {
+ edge_array[j].seq = NULL;
+ }
+
+ while ( gzgets ( fp, line, sizeof ( line ) ) != NULL )
+ {
+ if ( line[0] == '>' )
+ {
+ if ( index >= 0 )
+ {
+ edgeno = index + 1;
+ edge_array[edgeno].length = length;
+ edge_array[edgeno].cvg = cvg;
+ edge_array[edgeno].from_vt = kmer2vt ( from_kmer );
+ edge_array[edgeno].to_vt = kmer2vt ( to_kmer );
+ edge_array[edgeno].seq = tightSeq;
+ edge_array[edgeno].bal_edge = bal_ed + 1;
+ edge_array[edgeno].rv = NULL;
+ edge_array[edgeno].arcs = NULL;
+ edge_array[edgeno].flag = 0;
+ edge_array[edgeno].deleted = 0;
+
+ if ( bal_ed )
+ {
+ buildReverseComplementEdge ( edgeno );
+ index++;
+ }
+ }
+
+ n = 0;
+ index++;
#ifdef MER127
- sscanf ( line + 7, "%d,%llx %llx %llx %llx,%llx %llx %llx %llx,%s %d,%d",
- &length, & ( from_kmer.high1 ), & ( from_kmer.low1 ), & ( from_kmer.high2 ), & ( from_kmer.low2 ), & ( to_kmer.high1 ), & ( to_kmer.low1 ),
- & ( to_kmer.high2 ), & ( to_kmer.low2 ), str, &cvg, &bal_ed );
+ sscanf ( line + 7, "%d,%llx %llx %llx %llx,%llx %llx %llx %llx,%s %d,%d",
+ &length, & ( from_kmer.high1 ), & ( from_kmer.low1 ), & ( from_kmer.high2 ), & ( from_kmer.low2 ), & ( to_kmer.high1 ), & ( to_kmer.low1 ),
+ & ( to_kmer.high2 ), & ( to_kmer.low2 ), str, &cvg, &bal_ed );
#else
- sscanf ( line + 7, "%d,%llx %llx,%llx %llx,%s %d,%d", &length, & ( from_kmer.high ), & ( from_kmer.low ), & ( to_kmer.high ), & ( to_kmer.low ), str, &cvg, &bal_ed );
+ sscanf ( line + 7, "%d,%llx %llx,%llx %llx,%s %d,%d", &length, & ( from_kmer.high ), & ( from_kmer.low ), & ( to_kmer.high ), & ( to_kmer.low ), str, &cvg, &bal_ed );
#endif
- tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
- }
- else
- {
- linelen = strlen ( line );
-
- for ( i = 0; i < linelen; i++ )
- {
- if ( line[i] >= 'a' && line[i] <= 'z' )
- {
- c = base2int ( line[i] - 'a' + 'A' );
- writeChar2tightString ( c, tightSeq, n++ );
- }
- else if ( line[i] >= 'A' && line[i] <= 'Z' )
- {
- c = base2int ( line[i] );
- writeChar2tightString ( c, tightSeq, n++ );
- }
- }
- }
- }
-
- if ( index >= 0 )
- {
- edgeno = index + 1;
- edge_array[edgeno].length = length;
- edge_array[edgeno].cvg = cvg;
- edge_array[edgeno].from_vt = kmer2vt ( from_kmer );
- edge_array[edgeno].to_vt = kmer2vt ( to_kmer );
- edge_array[edgeno].seq = tightSeq;
- edge_array[edgeno].bal_edge = bal_ed + 1;
-
- if ( bal_ed )
- {
- buildReverseComplementEdge ( edgeno );
- index++;
- }
- }
-
- fprintf ( stderr, "%d edge(s) input.\n", index + 1 );
- gzclose ( fp );
- createArcMemo ();
- loadPreArcs ( graphfile );
+ tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
+ }
+ else
+ {
+ linelen = strlen ( line );
+
+ for ( i = 0; i < linelen; i++ )
+ {
+ if ( line[i] >= 'a' && line[i] <= 'z' )
+ {
+ c = base2int ( line[i] - 'a' + 'A' );
+ writeChar2tightString ( c, tightSeq, n++ );
+ }
+ else if ( line[i] >= 'A' && line[i] <= 'Z' )
+ {
+ c = base2int ( line[i] );
+ writeChar2tightString ( c, tightSeq, n++ );
+ }
+ }
+ }
+ }
+
+ if ( index >= 0 )
+ {
+ edgeno = index + 1;
+ edge_array[edgeno].length = length;
+ edge_array[edgeno].cvg = cvg;
+ edge_array[edgeno].from_vt = kmer2vt ( from_kmer );
+ edge_array[edgeno].to_vt = kmer2vt ( to_kmer );
+ edge_array[edgeno].seq = tightSeq;
+ edge_array[edgeno].bal_edge = bal_ed + 1;
+
+ if ( bal_ed )
+ {
+ buildReverseComplementEdge ( edgeno );
+ index++;
+ }
+ }
+
+ fprintf ( stderr, "%d edge(s) input.\n", index + 1 );
+ gzclose ( fp );
+ createArcMemo ();
+ loadPreArcs ( graphfile );
}
unsigned int getTwinEdge ( unsigned int edgeno )
{
- return edgeno + edge_array[edgeno].bal_edge - 1;
+ return edgeno + edge_array[edgeno].bal_edge - 1;
}
boolean EdSmallerThanTwin ( unsigned int edgeno )
{
- return edge_array[edgeno].bal_edge > 1;
+ return edge_array[edgeno].bal_edge > 1;
}
boolean EdLargerThanTwin ( unsigned int edgeno )
{
- return edge_array[edgeno].bal_edge < 1;
+ return edge_array[edgeno].bal_edge < 1;
}
boolean EdSameAsTwin ( unsigned int edgeno )
{
- return edge_array[edgeno].bal_edge == 1;
+ return edge_array[edgeno].bal_edge == 1;
}
/*************************************************
@@ -579,68 +579,68 @@ Return:
*************************************************/
static void add1Arc ( unsigned int from_ed, unsigned int to_ed, unsigned int weight )
{
- if ( edge_array[from_ed].to_vt != edge_array[to_ed].from_vt )
- {
- //fprintf(stderr,"add1Arc: inconsistant joins\n");
- return;
- }
-
- unsigned int bal_fe = getTwinEdge ( from_ed );
- unsigned int bal_te = getTwinEdge ( to_ed );
-
- if ( from_ed > num_ed || to_ed > num_ed || bal_fe > num_ed || bal_te > num_ed )
- {
- return;
- }
-
- ARC * parc, *bal_parc;
- //both arcs already exist
- parc = getArcBetween ( from_ed, to_ed );
-
- if ( parc )
- {
- bal_parc = parc->bal_arc;
- parc->multiplicity += weight;
- bal_parc->multiplicity += weight;
- return;
- }
-
- //create new arcs
- parc = allocateArc ( to_ed );
- parc->multiplicity = weight;
- parc->prev = NULL;
-
- if ( edge_array[from_ed].arcs )
- {
- edge_array[from_ed].arcs->prev = parc;
- }
-
- parc->next = edge_array[from_ed].arcs;
- edge_array[from_ed].arcs = parc;
-
- // A->A'
- if ( bal_te == from_ed )
- {
- //printf("preArc from A to A'\n");
- parc->bal_arc = parc;
- parc->multiplicity += weight;
- return;
- }
-
- bal_parc = allocateArc ( bal_fe );
- bal_parc->multiplicity = weight;
- bal_parc->prev = NULL;
-
- if ( edge_array[bal_te].arcs )
- {
- edge_array[bal_te].arcs->prev = bal_parc;
- }
-
- bal_parc->next = edge_array[bal_te].arcs;
- edge_array[bal_te].arcs = bal_parc;
- //link them to each other
- parc->bal_arc = bal_parc;
- bal_parc->bal_arc = parc;
+ if ( edge_array[from_ed].to_vt != edge_array[to_ed].from_vt )
+ {
+ //fprintf(stderr,"add1Arc: inconsistant joins\n");
+ return;
+ }
+
+ unsigned int bal_fe = getTwinEdge ( from_ed );
+ unsigned int bal_te = getTwinEdge ( to_ed );
+
+ if ( from_ed > num_ed || to_ed > num_ed || bal_fe > num_ed || bal_te > num_ed )
+ {
+ return;
+ }
+
+ ARC *parc, *bal_parc;
+ //both arcs already exist
+ parc = getArcBetween ( from_ed, to_ed );
+
+ if ( parc )
+ {
+ bal_parc = parc->bal_arc;
+ parc->multiplicity += weight;
+ bal_parc->multiplicity += weight;
+ return;
+ }
+
+ //create new arcs
+ parc = allocateArc ( to_ed );
+ parc->multiplicity = weight;
+ parc->prev = NULL;
+
+ if ( edge_array[from_ed].arcs )
+ {
+ edge_array[from_ed].arcs->prev = parc;
+ }
+
+ parc->next = edge_array[from_ed].arcs;
+ edge_array[from_ed].arcs = parc;
+
+ // A->A'
+ if ( bal_te == from_ed )
+ {
+ //printf("preArc from A to A'\n");
+ parc->bal_arc = parc;
+ parc->multiplicity += weight;
+ return;
+ }
+
+ bal_parc = allocateArc ( bal_fe );
+ bal_parc->multiplicity = weight;
+ bal_parc->prev = NULL;
+
+ if ( edge_array[bal_te].arcs )
+ {
+ edge_array[bal_te].arcs->prev = bal_parc;
+ }
+
+ bal_parc->next = edge_array[bal_te].arcs;
+ edge_array[bal_te].arcs = bal_parc;
+ //link them to each other
+ parc->bal_arc = bal_parc;
+ bal_parc->bal_arc = parc;
}
/*************************************************
@@ -655,44 +655,44 @@ Output:
Return:
None.
*************************************************/
-void loadPreArcs ( char * graphfile )
+void loadPreArcs ( char *graphfile )
{
- FILE * fp;
- char name[256], line[1024];
- unsigned int target, weight;
- unsigned int from_ed;
- char * seg;
- sprintf ( name, "%s.preArc", graphfile );
- fp = ckopen ( name, "r" );
- arcCounter = 0;
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- seg = strtok ( line, " " );
- from_ed = atoi ( seg );
-
- while ( ( seg = strtok ( NULL, " " ) ) != NULL )
- {
- target = atoi ( seg );
- seg = strtok ( NULL, " " );
- weight = atoi ( seg );
- add1Arc ( from_ed, target, weight );
- }
- }
-
- fprintf ( stderr, "%lli pre-arcs loaded.\n", arcCounter );
- fclose ( fp );
+ FILE *fp;
+ char name[256], line[1024];
+ unsigned int target, weight;
+ unsigned int from_ed;
+ char *seg;
+ sprintf ( name, "%s.preArc", graphfile );
+ fp = ckopen ( name, "r" );
+ arcCounter = 0;
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ seg = strtok ( line, " " );
+ from_ed = atoi ( seg );
+
+ while ( ( seg = strtok ( NULL, " " ) ) != NULL )
+ {
+ target = atoi ( seg );
+ seg = strtok ( NULL, " " );
+ weight = atoi ( seg );
+ add1Arc ( from_ed, target, weight );
+ }
+ }
+
+ fprintf ( stderr, "%lli pre-arcs loaded.\n", arcCounter );
+ fclose ( fp );
}
-void free_edge_array ( EDGE * ed_array, int ed_num )
+void free_edge_array ( EDGE *ed_array, int ed_num )
{
- int i;
+ int i;
- for ( i = 1; i <= ed_num; i++ )
- if ( ed_array[i].seq )
- {
- free ( ( void * ) ed_array[i].seq );
- }
+ for ( i = 1; i <= ed_num; i++ )
+ if ( ed_array[i].seq )
+ {
+ free ( ( void * ) ed_array[i].seq );
+ }
- free ( ( void * ) ed_array );
+ free ( ( void * ) ed_array );
}
diff --git a/standardPregraph/localAsm.c b/standardPregraph/localAsm.c
index e61d9e4..9133db3 100644
--- a/standardPregraph/localAsm.c
+++ b/standardPregraph/localAsm.c
@@ -1,7 +1,7 @@
/*
* localAsm.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -30,1365 +30,1368 @@
#define UPlimit 5000
#define MaxRouteNum 10
-static void kmerSet_mark ( KmerSet * set );
-static void trace4Repeat ( Kmer currW, int steps, int min, int max, int * num_route, KmerSet * kset,
- Kmer kmerDest, int overlap, Kmer WORDF, int * traceCounter, int maxRoute,
- kmer_t ** soFarNode, short * multiOccu1, short * multiOccu2, int * routeLens,
- char ** foundRoutes, char * soFarSeq, long long * soFarLinks, double * avgLinks );
+static void kmerSet_mark ( KmerSet *set );
+static void trace4Repeat ( Kmer currW, int steps, int min, int max, int *num_route, KmerSet *kset,
+ Kmer kmerDest, int overlap, Kmer WORDF, int *traceCounter, int maxRoute,
+ kmer_t **soFarNode, short *multiOccu1, short *multiOccu2, int *routeLens,
+ char **foundRoutes, char *soFarSeq, long long *soFarLinks, double *avgLinks );
#ifdef MER127
static Kmer prevKmerLocal ( Kmer next, char ch, int overlap )
{
- Kmer word = KmerRightBitMoveBy2 ( next );
+ Kmer word = KmerRightBitMoveBy2 ( next );
- if ( 2 * ( overlap - 1 ) < 64 )
- {
- word.low2 |= ( ( ( ubyte8 ) ch ) << 2 * ( overlap - 1 ) );
- }
+ if ( 2 * ( overlap - 1 ) < 64 )
+ {
+ word.low2 |= ( ( ( ubyte8 ) ch ) << 2 * ( overlap - 1 ) );
+ }
- if ( 2 * ( overlap - 1 ) >= 64 && 2 * ( overlap - 1 ) < 128 )
- {
- word.high2 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlap - 1 ) - 64 );
- }
+ if ( 2 * ( overlap - 1 ) >= 64 && 2 * ( overlap - 1 ) < 128 )
+ {
+ word.high2 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlap - 1 ) - 64 );
+ }
- if ( 2 * ( overlap - 1 ) >= 128 && 2 * ( overlap - 1 ) < 192 )
- {
- word.low1 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlap - 1 ) - 128 );
- }
+ if ( 2 * ( overlap - 1 ) >= 128 && 2 * ( overlap - 1 ) < 192 )
+ {
+ word.low1 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlap - 1 ) - 128 );
+ }
- if ( 2 * ( overlap - 1 ) >= 192 && 2 * ( overlap - 1 ) < 256 )
- {
- word.high1 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlap - 1 ) - 192 );
- }
+ if ( 2 * ( overlap - 1 ) >= 192 && 2 * ( overlap - 1 ) < 256 )
+ {
+ word.high1 |= ( ( ubyte8 ) ch ) << ( 2 * ( overlap - 1 ) - 192 );
+ }
- return word;
+ return word;
}
static Kmer nextKmerLocal ( Kmer prev, char ch, Kmer WordFilter )
{
- Kmer word = KmerLeftBitMoveBy2 ( prev );
- word = KmerAnd ( word, WordFilter );
- word.low2 |= ch;
- return word;
+ Kmer word = KmerLeftBitMoveBy2 ( prev );
+ word = KmerAnd ( word, WordFilter );
+ word.low2 |= ch;
+ return word;
}
#else
static Kmer prevKmerLocal ( Kmer next, char ch, int overlap )
{
- Kmer word = KmerRightBitMoveBy2 ( next );
-
- if ( 2 * ( overlap - 1 ) < 64 )
- {
- word.low |= ( ( ( ubyte8 ) ch ) << 2 * ( overlap - 1 ) );
- }
- else
- {
- word.high |= ( ( ubyte8 ) ch ) << ( 2 * ( overlap - 1 ) - 64 );
- }
-
- return word;
+ Kmer word = KmerRightBitMoveBy2 ( next );
+
+ if ( 2 * ( overlap - 1 ) < 64 )
+ {
+ word.low |= ( ( ( ubyte8 ) ch ) << 2 * ( overlap - 1 ) );
+ }
+ else
+ {
+ word.high |= ( ( ubyte8 ) ch ) << ( 2 * ( overlap - 1 ) - 64 );
+ }
+
+ return word;
}
static Kmer nextKmerLocal ( Kmer prev, char ch, Kmer WordFilter )
{
- Kmer word = KmerLeftBitMoveBy2 ( prev );
- word = KmerAnd ( word, WordFilter );
- word.low |= ch;
- return word;
+ Kmer word = KmerLeftBitMoveBy2 ( prev );
+ word = KmerAnd ( word, WordFilter );
+ word.low |= ch;
+ return word;
}
#endif
-static void singleKmer ( int t, KmerSet * kset, int flag, Kmer * kmerBuffer, char * prevcBuffer, char * nextcBuffer )
+static void singleKmer ( int t, KmerSet *kset, int flag, Kmer *kmerBuffer, char *prevcBuffer, char *nextcBuffer )
{
- kmer_t * pos;
- put_kmerset ( kset, kmerBuffer[t], prevcBuffer[t], nextcBuffer[t], &pos );
-
- if ( pos->inEdge == flag )
- {
- return;
- }
- else if ( pos->inEdge == 0 )
- {
- pos->inEdge = flag;
- }
- else if ( pos->inEdge == 1 && flag == 2 )
- {
- pos->inEdge = 3;
- }
- else if ( pos->inEdge == 2 && flag == 1 )
- {
- pos->inEdge = 3;
- }
+ kmer_t *pos;
+ put_kmerset ( kset, kmerBuffer[t], prevcBuffer[t], nextcBuffer[t], &pos );
+
+ if ( pos->inEdge == flag )
+ {
+ return;
+ }
+ else if ( pos->inEdge == 0 )
+ {
+ pos->inEdge = flag;
+ }
+ else if ( pos->inEdge == 1 && flag == 2 )
+ {
+ pos->inEdge = 3;
+ }
+ else if ( pos->inEdge == 2 && flag == 1 )
+ {
+ pos->inEdge = 3;
+ }
}
-static void putKmer2DBgraph ( KmerSet * kset, int flag, int kmer_c, Kmer * kmerBuffer, char * prevcBuffer, char * nextcBuffer )
+static void putKmer2DBgraph ( KmerSet *kset, int flag, int kmer_c, Kmer *kmerBuffer, char *prevcBuffer, char *nextcBuffer )
{
- int t;
+ int t;
- for ( t = 0; t < kmer_c; t++ )
- {
- singleKmer ( t, kset, flag, kmerBuffer, prevcBuffer, nextcBuffer );
- }
+ for ( t = 0; t < kmer_c; t++ )
+ {
+ singleKmer ( t, kset, flag, kmerBuffer, prevcBuffer, nextcBuffer );
+ }
}
-static void getSeqFromRead ( READNEARBY read, char * src_seq )
+static void getSeqFromRead ( READNEARBY read, char *src_seq )
{
- int len_seq = read.len;
- int j;
- char * tightSeq = ( char * ) darrayGet ( readSeqInGap, read.seqStarter );
-
- for ( j = 0; j < len_seq; j++ )
- {
- src_seq[j] = getCharInTightString ( tightSeq, j );
- }
+ int len_seq = read.len;
+ int j;
+ char *tightSeq = ( char * ) darrayGet ( readSeqInGap, read.seqStarter );
+
+ for ( j = 0; j < len_seq; j++ )
+ {
+ src_seq[j] = getCharInTightString ( tightSeq, j );
+ }
}
#ifdef MER127
-static void chopKmer4Ctg ( Kmer * kmerCtg, int lenCtg, int overlap, char * src_seq, Kmer WORDF )
+static void chopKmer4Ctg ( Kmer *kmerCtg, int lenCtg, int overlap, char *src_seq, Kmer WORDF )
{
- int index, j;
- Kmer word;
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
-
- for ( index = 0; index < overlap; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= src_seq[index];
- }
-
- index = 0;
- kmerCtg[index++] = word;
-
- for ( j = 1; j <= lenCtg - overlap; j++ )
- {
- word = nextKmerLocal ( word, src_seq[j - 1 + overlap], WORDF );
- kmerCtg[index++] = word;
- }
+ int index, j;
+ Kmer word;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
+
+ for ( index = 0; index < overlap; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= src_seq[index];
+ }
+
+ index = 0;
+ kmerCtg[index++] = word;
+
+ for ( j = 1; j <= lenCtg - overlap; j++ )
+ {
+ word = nextKmerLocal ( word, src_seq[j - 1 + overlap], WORDF );
+ kmerCtg[index++] = word;
+ }
}
#else
-static void chopKmer4Ctg ( Kmer * kmerCtg, int lenCtg, int overlap, char * src_seq, Kmer WORDF )
+static void chopKmer4Ctg ( Kmer *kmerCtg, int lenCtg, int overlap, char *src_seq, Kmer WORDF )
{
- int index, j;
- Kmer word;
- word.high = word.low = 0;
-
- for ( index = 0; index < overlap; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= src_seq[index];
- }
-
- index = 0;
- kmerCtg[index++] = word;
-
- for ( j = 1; j <= lenCtg - overlap; j++ )
- {
- word = nextKmerLocal ( word, src_seq[j - 1 + overlap], WORDF );
- kmerCtg[index++] = word;
- }
+ int index, j;
+ Kmer word;
+ word.high = word.low = 0;
+
+ for ( index = 0; index < overlap; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= src_seq[index];
+ }
+
+ index = 0;
+ kmerCtg[index++] = word;
+
+ for ( j = 1; j <= lenCtg - overlap; j++ )
+ {
+ word = nextKmerLocal ( word, src_seq[j - 1 + overlap], WORDF );
+ kmerCtg[index++] = word;
+ }
}
#endif
-static void chopKmer4read ( int len_seq, int overlap, char * src_seq, char * bal_seq, Kmer * kmerBuffer, char * prevcBuffer, char * nextcBuffer, int * kmer_c, Kmer WORDF )
+static void chopKmer4read ( int len_seq, int overlap, char *src_seq, char *bal_seq, Kmer *kmerBuffer, char *prevcBuffer, char *nextcBuffer, int *kmer_c, Kmer WORDF )
{
- int j, bal_j;
- Kmer word, bal_word;
- int index;
- char InvalidCh = 4;
-
- if ( len_seq < overlap + 1 )
- {
- *kmer_c = 0;
- return;
- }
+ int j, bal_j;
+ Kmer word, bal_word;
+ int index;
+ char InvalidCh = 4;
+
+ if ( len_seq < overlap + 1 )
+ {
+ *kmer_c = 0;
+ return;
+ }
#ifdef MER127
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
- for ( index = 0; index < overlap; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= src_seq[index];
- }
+ for ( index = 0; index < overlap; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= src_seq[index];
+ }
#else
- word.high = word.low = 0;
+ word.high = word.low = 0;
- for ( index = 0; index < overlap; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= src_seq[index];
- }
+ for ( index = 0; index < overlap; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= src_seq[index];
+ }
#endif
- reverseComplementSeq ( src_seq, len_seq, bal_seq );
- // complementary node
- bal_word = reverseComplement ( word, overlap );
- bal_j = len_seq - 0 - overlap; // 0;
- index = 0;
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- kmerBuffer[index] = word;
- prevcBuffer[index] = InvalidCh;
- nextcBuffer[index++] = src_seq[0 + overlap];
- }
- else
- {
- kmerBuffer[index] = bal_word;
- prevcBuffer[index] = bal_seq[bal_j - 1];
- nextcBuffer[index++] = InvalidCh;
- }
-
- for ( j = 1; j <= len_seq - overlap; j++ )
- {
- word = nextKmerLocal ( word, src_seq[j - 1 + overlap], WORDF );
- bal_j = len_seq - j - overlap; // j;
- bal_word = prevKmerLocal ( bal_word, bal_seq[bal_j], overlap );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- kmerBuffer[index] = word;
- prevcBuffer[index] = src_seq[j - 1];
-
- if ( j < len_seq - overlap )
- {
- nextcBuffer[index++] = src_seq[j + overlap];
- }
- else
- {
- nextcBuffer[index++] = InvalidCh;
- }
-
- //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
- }
- else
- {
- // complementary node
- kmerBuffer[index] = bal_word;
-
- if ( bal_j > 0 )
- {
- prevcBuffer[index] = bal_seq[bal_j - 1];
- }
- else
- {
- prevcBuffer[index] = InvalidCh;
- }
-
- nextcBuffer[index++] = bal_seq[bal_j + overlap];
- //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
- }
- }
-
- *kmer_c = index;
+ reverseComplementSeq ( src_seq, len_seq, bal_seq );
+ // complementary node
+ bal_word = reverseComplement ( word, overlap );
+ bal_j = len_seq - 0 - overlap; // 0;
+ index = 0;
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ kmerBuffer[index] = word;
+ prevcBuffer[index] = InvalidCh;
+ nextcBuffer[index++] = src_seq[0 + overlap];
+ }
+ else
+ {
+ kmerBuffer[index] = bal_word;
+ prevcBuffer[index] = bal_seq[bal_j - 1];
+ nextcBuffer[index++] = InvalidCh;
+ }
+
+ for ( j = 1; j <= len_seq - overlap; j++ )
+ {
+ word = nextKmerLocal ( word, src_seq[j - 1 + overlap], WORDF );
+ bal_j = len_seq - j - overlap; // j;
+ bal_word = prevKmerLocal ( bal_word, bal_seq[bal_j], overlap );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ kmerBuffer[index] = word;
+ prevcBuffer[index] = src_seq[j - 1];
+
+ if ( j < len_seq - overlap )
+ {
+ nextcBuffer[index++] = src_seq[j + overlap];
+ }
+ else
+ {
+ nextcBuffer[index++] = InvalidCh;
+ }
+
+ //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
+ }
+ else
+ {
+ // complementary node
+ kmerBuffer[index] = bal_word;
+
+ if ( bal_j > 0 )
+ {
+ prevcBuffer[index] = bal_seq[bal_j - 1];
+ }
+ else
+ {
+ prevcBuffer[index] = InvalidCh;
+ }
+
+ nextcBuffer[index++] = bal_seq[bal_j + overlap];
+ //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
+ }
+ }
+
+ *kmer_c = index;
}
-static void headTightStr ( char * tightStr, int length, int start, int headLen, int revS, char * src_seq )
+static void headTightStr ( char *tightStr, int length, int start, int headLen, int revS, char *src_seq )
{
- int i, index = 0;
-
- if ( !revS )
- {
- for ( i = start; i < start + headLen; i++ )
- {
- src_seq[index++] = getCharInTightString ( tightStr, i );
- }
- }
- else
- {
- for ( i = length - 1 - start; i >= length - headLen - start; i-- )
- {
- src_seq[index++] = int_comp ( getCharInTightString ( tightStr, i ) );
- }
- }
+ int i, index = 0;
+
+ if ( !revS )
+ {
+ for ( i = start; i < start + headLen; i++ )
+ {
+ src_seq[index++] = getCharInTightString ( tightStr, i );
+ }
+ }
+ else
+ {
+ for ( i = length - 1 - start; i >= length - headLen - start; i-- )
+ {
+ src_seq[index++] = int_comp ( getCharInTightString ( tightStr, i ) );
+ }
+ }
}
-static int getSeqFromCtg ( CTGinSCAF * ctg, boolean fromHead, unsigned int len, int originOverlap, char * src_seq )
+static int getSeqFromCtg ( CTGinSCAF *ctg, boolean fromHead, unsigned int len, int originOverlap, char *src_seq )
{
- unsigned int ctgId = ctg->ctgID;
- unsigned int bal_ctg = getTwinCtg ( ctgId );
-
- if ( contig_array[ctgId].length < 1 )
- {
- return 0;
- }
-
- unsigned int length = contig_array[ctgId].length + originOverlap;
- len = len < length ? len : length;
-
- if ( fromHead )
- {
- if ( contig_array[ctgId].seq )
- {
- headTightStr ( contig_array[ctgId].seq, length, 0, len, 0, src_seq );
- }
- else
- {
- headTightStr ( contig_array[bal_ctg].seq, length, 0, len, 1, src_seq );
- }
- }
- else
- {
- if ( contig_array[ctgId].seq )
- {
- headTightStr ( contig_array[ctgId].seq, length, length - len, len, 0, src_seq );
- }
- else
- {
- headTightStr ( contig_array[bal_ctg].seq, length, length - len, len, 1, src_seq );
- }
- }
-
- return len;
+ unsigned int ctgId = ctg->ctgID;
+ unsigned int bal_ctg = getTwinCtg ( ctgId );
+
+ if ( contig_array[ctgId].length < 1 )
+ {
+ return 0;
+ }
+
+ unsigned int length = contig_array[ctgId].length + originOverlap;
+ len = len < length ? len : length;
+
+ if ( fromHead )
+ {
+ if ( contig_array[ctgId].seq )
+ {
+ headTightStr ( contig_array[ctgId].seq, length, 0, len, 0, src_seq );
+ }
+ else
+ {
+ headTightStr ( contig_array[bal_ctg].seq, length, 0, len, 1, src_seq );
+ }
+ }
+ else
+ {
+ if ( contig_array[ctgId].seq )
+ {
+ headTightStr ( contig_array[ctgId].seq, length, length - len, len, 0, src_seq );
+ }
+ else
+ {
+ headTightStr ( contig_array[bal_ctg].seq, length, length - len, len, 1, src_seq );
+ }
+ }
+
+ return len;
}
-static KmerSet * readsInGap2DBgraph ( READNEARBY * rdArray, int num, CTGinSCAF * ctg1, CTGinSCAF * ctg2, int originOverlap, Kmer * kmerCtg1, Kmer * kmerCtg2, int overlap, Kmer WordFilter )
+static KmerSet *readsInGap2DBgraph ( READNEARBY *rdArray, int num, CTGinSCAF *ctg1, CTGinSCAF *ctg2, int originOverlap, Kmer *kmerCtg1, Kmer *kmerCtg2, int overlap, Kmer WordFilter )
{
- int kmer_c;
- Kmer * kmerBuffer;
- char * nextcBuffer, *prevcBuffer;
- int i;
- int buffer_size = maxReadLen > CTGendLen ? maxReadLen : CTGendLen;
- KmerSet * kmerS = NULL;
- int lenCtg1;
- int lenCtg2;
- char * bal_seq;
- char * src_seq;
- src_seq = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
- bal_seq = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
- lenCtg1 = getSeqFromCtg ( ctg1, 0, CTGendLen, originOverlap, src_seq );
- lenCtg2 = getSeqFromCtg ( ctg2, 1, CTGendLen, originOverlap, src_seq );
-
- if ( lenCtg1 <= overlap || lenCtg2 <= overlap )
- {
- free ( ( void * ) src_seq );
- free ( ( void * ) bal_seq );
- return kmerS;
- }
-
- kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- prevcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
- nextcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
- kmerS = init_kmerset ( 1024, 0.77f );
-
- for ( i = 0; i < num; i++ )
- {
- getSeqFromRead ( rdArray[i], src_seq );
- chopKmer4read ( rdArray[i].len, overlap, src_seq, bal_seq, kmerBuffer, prevcBuffer, nextcBuffer, &kmer_c, WordFilter );
- putKmer2DBgraph ( kmerS, 0, kmer_c, kmerBuffer, prevcBuffer, nextcBuffer );
- }
-
- lenCtg1 = getSeqFromCtg ( ctg1, 0, CTGendLen, originOverlap, src_seq );
- chopKmer4Ctg ( kmerCtg1, lenCtg1, overlap, src_seq, WordFilter );
- chopKmer4read ( lenCtg1, overlap, src_seq, bal_seq, kmerBuffer, prevcBuffer, nextcBuffer, &kmer_c, WordFilter );
- putKmer2DBgraph ( kmerS, 1, kmer_c, kmerBuffer, prevcBuffer, nextcBuffer );
- lenCtg2 = getSeqFromCtg ( ctg2, 1, CTGendLen, originOverlap, src_seq );
- chopKmer4Ctg ( kmerCtg2, lenCtg2, overlap, src_seq, WordFilter );
- chopKmer4read ( lenCtg2, overlap, src_seq, bal_seq, kmerBuffer, prevcBuffer, nextcBuffer, &kmer_c, WordFilter );
- putKmer2DBgraph ( kmerS, 2, kmer_c, kmerBuffer, prevcBuffer, nextcBuffer );
- /*
- if(ctg1->ctgID==3733&&ctg2->ctgID==3067){
- for(i=0;i<lenCtg2;i++)
- printf("%c",int2base((int)src_seq[i]));
- printf("\n");
- }
- */
- //printf("sequence length chop from contigs on both sides: %d %d\n",lenCtg1,lenCtg2);
- //kmerSet_deLoop(kmerS,WordFilter);
- kmerSet_mark ( kmerS );
- free ( ( void * ) src_seq );
- free ( ( void * ) bal_seq );
- free ( ( void * ) kmerBuffer );
- free ( ( void * ) nextcBuffer );
- free ( ( void * ) prevcBuffer );
- return kmerS;
+ int kmer_c;
+ Kmer *kmerBuffer;
+ char *nextcBuffer, *prevcBuffer;
+ int i;
+ int buffer_size = maxReadLen > CTGendLen ? maxReadLen : CTGendLen;
+ KmerSet *kmerS = NULL;
+ int lenCtg1;
+ int lenCtg2;
+ char *bal_seq;
+ char *src_seq;
+ src_seq = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
+ bal_seq = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
+ lenCtg1 = getSeqFromCtg ( ctg1, 0, CTGendLen, originOverlap, src_seq );
+ lenCtg2 = getSeqFromCtg ( ctg2, 1, CTGendLen, originOverlap, src_seq );
+
+ if ( lenCtg1 <= overlap || lenCtg2 <= overlap )
+ {
+ free ( ( void * ) src_seq );
+ free ( ( void * ) bal_seq );
+ return kmerS;
+ }
+
+ kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ prevcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
+ nextcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
+ kmerS = init_kmerset ( 1024, 0.77f );
+
+ for ( i = 0; i < num; i++ )
+ {
+ getSeqFromRead ( rdArray[i], src_seq );
+ chopKmer4read ( rdArray[i].len, overlap, src_seq, bal_seq, kmerBuffer, prevcBuffer, nextcBuffer, &kmer_c, WordFilter );
+ putKmer2DBgraph ( kmerS, 0, kmer_c, kmerBuffer, prevcBuffer, nextcBuffer );
+ }
+
+ lenCtg1 = getSeqFromCtg ( ctg1, 0, CTGendLen, originOverlap, src_seq );
+ chopKmer4Ctg ( kmerCtg1, lenCtg1, overlap, src_seq, WordFilter );
+ chopKmer4read ( lenCtg1, overlap, src_seq, bal_seq, kmerBuffer, prevcBuffer, nextcBuffer, &kmer_c, WordFilter );
+ putKmer2DBgraph ( kmerS, 1, kmer_c, kmerBuffer, prevcBuffer, nextcBuffer );
+ lenCtg2 = getSeqFromCtg ( ctg2, 1, CTGendLen, originOverlap, src_seq );
+ chopKmer4Ctg ( kmerCtg2, lenCtg2, overlap, src_seq, WordFilter );
+ chopKmer4read ( lenCtg2, overlap, src_seq, bal_seq, kmerBuffer, prevcBuffer, nextcBuffer, &kmer_c, WordFilter );
+ putKmer2DBgraph ( kmerS, 2, kmer_c, kmerBuffer, prevcBuffer, nextcBuffer );
+ /*
+ if(ctg1->ctgID==3733&&ctg2->ctgID==3067){
+ for(i=0;i<lenCtg2;i++)
+ printf("%c",int2base((int)src_seq[i]));
+ printf("\n");
+ }
+ */
+ //printf("sequence length chop from contigs on both sides: %d %d\n",lenCtg1,lenCtg2);
+ //kmerSet_deLoop(kmerS,WordFilter);
+ kmerSet_mark ( kmerS );
+ free ( ( void * ) src_seq );
+ free ( ( void * ) bal_seq );
+ free ( ( void * ) kmerBuffer );
+ free ( ( void * ) nextcBuffer );
+ free ( ( void * ) prevcBuffer );
+ return kmerS;
}
#ifdef MER127
-static void printKmerSeqLocal ( FILE * fp, Kmer kmer, int overlap )
+static void printKmerSeqLocal ( FILE *fp, Kmer kmer, int overlap )
{
- int i, bit1, bit2, bit3, bit4;
- char ch;
- char kmerSeq[128];
-
- if ( overlap < 32 )
- {
- bit4 = overlap;
- bit3 = 0;
- bit2 = 0;
- bit1 = 0;
- }
-
- if ( overlap >= 32 && overlap < 64 )
- {
- bit4 = 32;
- bit3 = overlap - 32;
- bit2 = 0;
- bit1 = 0;
- }
-
- if ( overlap >= 64 && overlap < 96 )
- {
- bit4 = 32;
- bit3 = 32;
- bit2 = overlap - 64;
- bit1 = 0;
- }
-
- if ( overlap >= 96 && overlap < 128 )
- {
- bit4 = 32;
- bit3 = 32;
- bit2 = 32;
- bit1 = overlap - 96;
- }
-
- for ( i = bit1 - 1; i >= 0; i-- )
- {
- ch = kmer.high1 & 0x3;
- kmer.high1 >>= 2;
- kmerSeq[i] = ch;
- }
-
- for ( i = bit2 - 1; i >= 0; i-- )
- {
- ch = kmer.low1 & 0x3;
- kmer.low1 >>= 2;
- kmerSeq[i + bit1] = ch;
- }
-
- for ( i = bit3 - 1; i >= 0; i-- )
- {
- ch = kmer.high2 & 0x3;
- kmer.high2 >>= 2;
- kmerSeq[i + bit1 + bit2] = ch;
- }
-
- for ( i = bit4 - 1; i >= 0; i-- )
- {
- ch = kmer.low2 & 0x3;
- kmer.low2 >>= 2;
- kmerSeq[i + bit1 + bit2 + bit3] = ch;
- }
-
- for ( i = 0; i < overlap; i++ )
- {
- fprintf ( fp, "%c", int2base ( ( int ) kmerSeq[i] ) );
- }
+ int i, bit1, bit2, bit3, bit4;
+ char ch;
+ char kmerSeq[128];
+
+ if ( overlap < 32 )
+ {
+ bit4 = overlap;
+ bit3 = 0;
+ bit2 = 0;
+ bit1 = 0;
+ }
+
+ if ( overlap >= 32 && overlap < 64 )
+ {
+ bit4 = 32;
+ bit3 = overlap - 32;
+ bit2 = 0;
+ bit1 = 0;
+ }
+
+ if ( overlap >= 64 && overlap < 96 )
+ {
+ bit4 = 32;
+ bit3 = 32;
+ bit2 = overlap - 64;
+ bit1 = 0;
+ }
+
+ if ( overlap >= 96 && overlap < 128 )
+ {
+ bit4 = 32;
+ bit3 = 32;
+ bit2 = 32;
+ bit1 = overlap - 96;
+ }
+
+ for ( i = bit1 - 1; i >= 0; i-- )
+ {
+ ch = kmer.high1 & 0x3;
+ kmer.high1 >>= 2;
+ kmerSeq[i] = ch;
+ }
+
+ for ( i = bit2 - 1; i >= 0; i-- )
+ {
+ ch = kmer.low1 & 0x3;
+ kmer.low1 >>= 2;
+ kmerSeq[i + bit1] = ch;
+ }
+
+ for ( i = bit3 - 1; i >= 0; i-- )
+ {
+ ch = kmer.high2 & 0x3;
+ kmer.high2 >>= 2;
+ kmerSeq[i + bit1 + bit2] = ch;
+ }
+
+ for ( i = bit4 - 1; i >= 0; i-- )
+ {
+ ch = kmer.low2 & 0x3;
+ kmer.low2 >>= 2;
+ kmerSeq[i + bit1 + bit2 + bit3] = ch;
+ }
+
+ for ( i = 0; i < overlap; i++ )
+ {
+ fprintf ( fp, "%c", int2base ( ( int ) kmerSeq[i] ) );
+ }
}
#else
-static void printKmerSeqLocal ( FILE * fp, Kmer kmer, int overlap )
+static void printKmerSeqLocal ( FILE *fp, Kmer kmer, int overlap )
{
- int i, bit1, bit2;
- char ch;
- char kmerSeq[64];
- bit2 = overlap > 32 ? 32 : overlap;
- bit1 = overlap > 32 ? overlap - 32 : 0;
-
- for ( i = bit1 - 1; i >= 0; i-- )
- {
- ch = kmer.high & 0x3;
- kmer.high >>= 2;
- kmerSeq[i] = ch;
- }
-
- for ( i = bit2 - 1; i >= 0; i-- )
- {
- ch = kmer.low & 0x3;
- kmer.low >>= 2;
- kmerSeq[i + bit1] = ch;
- }
-
- for ( i = 0; i < overlap; i++ )
- {
- fprintf ( fp, "%c", int2base ( ( int ) kmerSeq[i] ) );
- }
+ int i, bit1, bit2;
+ char ch;
+ char kmerSeq[64];
+ bit2 = overlap > 32 ? 32 : overlap;
+ bit1 = overlap > 32 ? overlap - 32 : 0;
+
+ for ( i = bit1 - 1; i >= 0; i-- )
+ {
+ ch = kmer.high & 0x3;
+ kmer.high >>= 2;
+ kmerSeq[i] = ch;
+ }
+
+ for ( i = bit2 - 1; i >= 0; i-- )
+ {
+ ch = kmer.low & 0x3;
+ kmer.low >>= 2;
+ kmerSeq[i + bit1] = ch;
+ }
+
+ for ( i = 0; i < overlap; i++ )
+ {
+ fprintf ( fp, "%c", int2base ( ( int ) kmerSeq[i] ) );
+ }
}
#endif
-static void kmerSet_mark ( KmerSet * set )
+static void kmerSet_mark ( KmerSet *set )
{
- int i, in_num, out_num, cvgSingle;
- kmer_t * rs;
- long long counter = 0, linear = 0;
- Kmer word;
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- in_num = out_num = 0;
- rs = set->array + set->iter_ptr;
- word = rs->seq;
-
- for ( i = 0; i < 4; i++ )
- {
- cvgSingle = get_kmer_left_cov ( *rs, i );
-
- if ( cvgSingle > 0 )
- {
- in_num++;
- }
-
- cvgSingle = get_kmer_right_cov ( *rs, i );
-
- if ( cvgSingle > 0 )
- {
- out_num++;
- }
- }
-
- if ( rs->single )
- {
- counter++;
- }
-
- if ( in_num == 1 && out_num == 1 )
- {
- rs->linear = 1;
- linear++;
- }
- }
-
- set->iter_ptr++;
- }
-
- //printf("Allocated %ld node, %ld single nodes, %ld linear\n",(long)count_kmerset(set),counter,linear);
+ int i, in_num, out_num, cvgSingle;
+ kmer_t *rs;
+ long long counter = 0, linear = 0;
+ Kmer word;
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ in_num = out_num = 0;
+ rs = set->array + set->iter_ptr;
+ word = rs->seq;
+
+ for ( i = 0; i < 4; i++ )
+ {
+ cvgSingle = get_kmer_left_cov ( *rs, i );
+
+ if ( cvgSingle > 0 )
+ {
+ in_num++;
+ }
+
+ cvgSingle = get_kmer_right_cov ( *rs, i );
+
+ if ( cvgSingle > 0 )
+ {
+ out_num++;
+ }
+ }
+
+ if ( rs->single )
+ {
+ counter++;
+ }
+
+ if ( in_num == 1 && out_num == 1 )
+ {
+ rs->linear = 1;
+ linear++;
+ }
+ }
+
+ set->iter_ptr++;
+ }
+
+ //printf("Allocated %ld node, %ld single nodes, %ld linear\n",(long)count_kmerset(set),counter,linear);
}
-static kmer_t * searchNode ( Kmer word, KmerSet * kset, int overlap )
+static kmer_t *searchNode ( Kmer word, KmerSet *kset, int overlap )
{
- Kmer bal_word = reverseComplement ( word, overlap );
- kmer_t * node;
- boolean found;
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- found = search_kmerset ( kset, word, &node );
- }
- else
- {
- found = search_kmerset ( kset, bal_word, &node );
- }
-
- if ( found )
- {
- return node;
- }
- else
- {
- return NULL;
- }
+ Kmer bal_word = reverseComplement ( word, overlap );
+ kmer_t *node;
+ boolean found;
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ found = search_kmerset ( kset, word, &node );
+ }
+ else
+ {
+ found = search_kmerset ( kset, bal_word, &node );
+ }
+
+ if ( found )
+ {
+ return node;
+ }
+ else
+ {
+ return NULL;
+ }
}
-static int searchKmerOnCtg ( Kmer currW, Kmer * kmerDest, int num )
+static int searchKmerOnCtg ( Kmer currW, Kmer *kmerDest, int num )
{
- int i;
+ int i;
- for ( i = 0; i < num; i++ )
- {
- if ( KmerEqual ( currW, kmerDest[i] ) )
- {
- return i;
- }
- }
+ for ( i = 0; i < num; i++ )
+ {
+ if ( KmerEqual ( currW, kmerDest[i] ) )
+ {
+ return i;
+ }
+ }
- return -1;
+ return -1;
}
// pick on from n items randomly
-static int nPick1 ( int * array, int n )
+static int nPick1 ( int *array, int n )
{
- int m, i;
- m = n - 1; //(int)(drand48()*n);
- int value = array[m];
+ int m, i;
+ m = n - 1; //(int)(drand48()*n);
+ int value = array[m];
- for ( i = m; i < n - 1; i++ )
- {
- array[i] = array[i + 1];
- }
+ for ( i = m; i < n - 1; i++ )
+ {
+ array[i] = array[i + 1];
+ }
- return value;
+ return value;
}
-static void traceAlongDBgraph ( Kmer currW, int steps, int min, int max, int * num_route, KmerSet * kset,
- Kmer * kmerDest, int num, int overlap, Kmer WORDF, char ** foundRoutes, int * routeEndOnCtg2,
- int * routeLens, char * soFarSeq, int * traceCounter, int maxRoute, kmer_t ** soFarNode,
- boolean * multiOccu, long long * soFarLinks, double * avgLinks )
+static void traceAlongDBgraph ( Kmer currW, int steps, int min, int max, int *num_route, KmerSet *kset,
+ Kmer *kmerDest, int num, int overlap, Kmer WORDF, char **foundRoutes, int *routeEndOnCtg2,
+ int *routeLens, char *soFarSeq, int *traceCounter, int maxRoute, kmer_t **soFarNode,
+ boolean *multiOccu, long long *soFarLinks, double *avgLinks )
{
- ( *traceCounter ) ++;
-
- if ( *traceCounter > UPlimit )
- {
- /*
- if(overlap==19&&kmerDest[0]==pubKmer)
- printf("UPlimit\n");
- */
- return;
- }
-
- if ( steps > max || *num_route >= maxRoute )
- {
- /*
- if(overlap==19&&kmerDest[0]==pubKmer)
- printf("max steps/maxRoute\n");
- */
- return;
- }
-
- Kmer word = reverseComplement ( currW, overlap );
- boolean isSmaller = KmerSmaller ( currW, word );
- int i;
- char ch;
- unsigned char links;
-
- if ( isSmaller )
- {
- word = currW;
- }
-
- kmer_t * node;
- boolean found = search_kmerset ( kset, word, &node );
-
- // #ifdef DEBUG
- if ( !found )
- {
- fprintf ( stderr, "%s Trace: can't find kmer ", __FUNCTION__ );
- PrintKmer ( stderr, word );
- fprintf ( stderr, " (input " );
- PrintKmer ( stderr, currW );
- fprintf ( stderr, ") at step %d.\n", steps );
- /*
- #ifdef MER127
- fprintf (stderr, "%s Trace: can't find kmer %llx %llx %llx %llx (input %llx %llx %llx %llx) at step %d.\n",
- __FUNCTION__, word.high1, word.low1, word.high2, word.low2, currW.high1, currW.low1,
- currW.high2, currW.low2, steps );
- #else
- printf ( "Trace: can't find kmer %llx %llx (input %llx %llx) at step %d\n",
- word.high, word.low, currW.high, currW.low, steps );
- #endif
- */
- return;
- }
-
- // #else
- // if (!found) return;
- // #endif
-
- if ( node->twin > 1 )
- {
- return;
- }
-
- if ( soFarNode )
- {
- soFarNode[steps] = node;
- }
-
- if ( steps > 0 )
- {
- soFarSeq[steps - 1] = lastCharInKmer ( currW );
- }
-
- int index, end;
- int linkCounter = *soFarLinks;
-
- if ( steps >= min && node->inEdge > 1 && ( end = searchKmerOnCtg ( currW, kmerDest, num ) ) >= 0 )
- {
- index = *num_route;
-
- if ( steps > 0 )
- {
- avgLinks[index] = ( double ) linkCounter / steps;
- }
- else
- {
- avgLinks[index] = 0;
- }
-
- //find node that appears more than once in the path
- multiOccu[index] = 0;
-
- for ( i = 0; i < steps + 1; i++ )
- {
- soFarNode[i]->deleted = 0;
- }
-
- for ( i = 0; i < steps + 1; i++ )
- {
- if ( soFarNode[i]->deleted )
- {
- multiOccu[index] = 1;
- break;
- }
-
- soFarNode[i]->deleted = 1;
- }
-
- routeEndOnCtg2[index] = end;
- routeLens[index] = steps;
- char * array = foundRoutes[index];
-
- for ( i = 0; i < steps; i++ )
- {
- array[i] = soFarSeq[i];
- }
-
- if ( i < max )
- {
- array[i] = 4;
- } //indicate the end of the sequence
-
- *num_route = ++index;
- return;
- }
-
- steps++;
-
- if ( isSmaller )
- {
- int array[] = { 0, 1, 2, 3 };
-
- for ( i = 4; i > 0; i-- )
- {
- ch = nPick1 ( array, i );
- links = get_kmer_right_cov ( *node, ch );
-
- if ( !links )
- {
- continue;
- }
-
- *soFarLinks = linkCounter + links;
- word = nextKmerLocal ( currW, ch, WORDF );
- traceAlongDBgraph ( word, steps, min, max, num_route, kset, kmerDest, num, overlap, WORDF,
- foundRoutes, routeEndOnCtg2, routeLens, soFarSeq, traceCounter, maxRoute,
- soFarNode, multiOccu, soFarLinks, avgLinks );
- }
- }
- else
- {
- int array[] = { 0, 1, 2, 3 };
-
- for ( i = 4; i > 0; i-- )
- {
- ch = nPick1 ( array, i );
- links = get_kmer_left_cov ( *node, ch );
-
- if ( !links )
- {
- continue;
- }
-
- *soFarLinks = linkCounter + links;
- word = nextKmerLocal ( currW, int_comp ( ch ), WORDF );
- traceAlongDBgraph ( word, steps, min, max, num_route, kset, kmerDest, num, overlap, WORDF,
- foundRoutes, routeEndOnCtg2, routeLens, soFarSeq, traceCounter, maxRoute,
- soFarNode, multiOccu, soFarLinks, avgLinks );
- }
- }
+ ( *traceCounter ) ++;
+
+ if ( *traceCounter > UPlimit )
+ {
+ /*
+ if(overlap==19&&kmerDest[0]==pubKmer)
+ printf("UPlimit\n");
+ */
+ return;
+ }
+
+ if ( steps > max || *num_route >= maxRoute )
+ {
+ /*
+ if(overlap==19&&kmerDest[0]==pubKmer)
+ printf("max steps/maxRoute\n");
+ */
+ return;
+ }
+
+ Kmer word = reverseComplement ( currW, overlap );
+ boolean isSmaller = KmerSmaller ( currW, word );
+ int i;
+ char ch;
+ unsigned char links;
+
+ if ( isSmaller )
+ {
+ word = currW;
+ }
+
+ kmer_t *node;
+ boolean found = search_kmerset ( kset, word, &node );
+
+ // #ifdef DEBUG
+ if ( !found )
+ {
+ fprintf ( stderr, "%s Trace: can't find kmer ", __FUNCTION__ );
+ PrintKmer ( stderr, word );
+ fprintf ( stderr, " (input " );
+ PrintKmer ( stderr, currW );
+ fprintf ( stderr, ") at step %d.\n", steps );
+ /*
+ #ifdef MER127
+ fprintf (stderr, "%s Trace: can't find kmer %llx %llx %llx %llx (input %llx %llx %llx %llx) at step %d.\n",
+ __FUNCTION__, word.high1, word.low1, word.high2, word.low2, currW.high1, currW.low1,
+ currW.high2, currW.low2, steps );
+ #else
+ printf ( "Trace: can't find kmer %llx %llx (input %llx %llx) at step %d\n",
+ word.high, word.low, currW.high, currW.low, steps );
+ #endif
+ */
+ return;
+ }
+
+ // #else
+ // if (!found) return;
+ // #endif
+
+ if ( node->twin > 1 )
+ {
+ return;
+ }
+
+ if ( soFarNode )
+ {
+ soFarNode[steps] = node;
+ }
+
+ if ( steps > 0 )
+ {
+ soFarSeq[steps - 1] = lastCharInKmer ( currW );
+ }
+
+ int index, end;
+ int linkCounter = *soFarLinks;
+
+ if ( steps >= min && node->inEdge > 1 && ( end = searchKmerOnCtg ( currW, kmerDest, num ) ) >= 0 )
+ {
+ index = *num_route;
+
+ if ( steps > 0 )
+ {
+ avgLinks[index] = ( double ) linkCounter / steps;
+ }
+ else
+ {
+ avgLinks[index] = 0;
+ }
+
+ //find node that appears more than once in the path
+ multiOccu[index] = 0;
+
+ for ( i = 0; i < steps + 1; i++ )
+ {
+ soFarNode[i]->deleted = 0;
+ }
+
+ for ( i = 0; i < steps + 1; i++ )
+ {
+ if ( soFarNode[i]->deleted )
+ {
+ multiOccu[index] = 1;
+ break;
+ }
+
+ soFarNode[i]->deleted = 1;
+ }
+
+ routeEndOnCtg2[index] = end;
+ routeLens[index] = steps;
+ char *array = foundRoutes[index];
+
+ for ( i = 0; i < steps; i++ )
+ {
+ array[i] = soFarSeq[i];
+ }
+
+ if ( i < max )
+ {
+ array[i] = 4;
+ } //indicate the end of the sequence
+
+ *num_route = ++index;
+ return;
+ }
+
+ steps++;
+
+ if ( isSmaller )
+ {
+ int array[] = { 0, 1, 2, 3 };
+
+ for ( i = 4; i > 0; i-- )
+ {
+ ch = nPick1 ( array, i );
+ links = get_kmer_right_cov ( *node, ch );
+
+ if ( !links )
+ {
+ continue;
+ }
+
+ *soFarLinks = linkCounter + links;
+ word = nextKmerLocal ( currW, ch, WORDF );
+ traceAlongDBgraph ( word, steps, min, max, num_route, kset, kmerDest, num, overlap, WORDF,
+ foundRoutes, routeEndOnCtg2, routeLens, soFarSeq, traceCounter, maxRoute,
+ soFarNode, multiOccu, soFarLinks, avgLinks );
+ }
+ }
+ else
+ {
+ int array[] = { 0, 1, 2, 3 };
+
+ for ( i = 4; i > 0; i-- )
+ {
+ ch = nPick1 ( array, i );
+ links = get_kmer_left_cov ( *node, ch );
+
+ if ( !links )
+ {
+ continue;
+ }
+
+ *soFarLinks = linkCounter + links;
+ word = nextKmerLocal ( currW, int_comp ( ch ), WORDF );
+ traceAlongDBgraph ( word, steps, min, max, num_route, kset, kmerDest, num, overlap, WORDF,
+ foundRoutes, routeEndOnCtg2, routeLens, soFarSeq, traceCounter, maxRoute,
+ soFarNode, multiOccu, soFarLinks, avgLinks );
+ }
+ }
}
-static int searchFgap ( KmerSet * kset, CTGinSCAF * ctg1, CTGinSCAF * ctg2, Kmer * kmerCtg1, Kmer * kmerCtg2,
- unsigned int origOverlap, int overlap, DARRAY * gapSeqArray, int len1, int len2,
- Kmer WordFilter, int * offset1, int * offset2, char * seqGap, int * cut1, int * cut2 )
+static int searchFgap ( KmerSet *kset, CTGinSCAF *ctg1, CTGinSCAF *ctg2, Kmer *kmerCtg1, Kmer *kmerCtg2,
+ unsigned int origOverlap, int overlap, DARRAY *gapSeqArray, int len1, int len2,
+ Kmer WordFilter, int *offset1, int *offset2, char *seqGap, int *cut1, int *cut2 )
{
- int i;
- int ret = 0;
- kmer_t * node, **soFarNode;
- int num_route;
- int gapLen = ctg2->start - ctg1->end - origOverlap + overlap;
- int min = gapLen - GLDiff > 0 ? gapLen - GLDiff : 0; //0531
- int max = gapLen + GLDiff < 10 ? 10 : gapLen + GLDiff;
- char ** foundRoutes;
- char * soFarSeq;
- int traceCounter;
- int * routeEndOnCtg2;
- int * routeLens;
- boolean * multiOccu;
- long long soFarLinks;
- double * avgLinks;
- //mask linear internal linear kmer on contig1 end
- routeEndOnCtg2 = ( int * ) ckalloc ( MaxRouteNum * sizeof ( int ) );
- routeLens = ( int * ) ckalloc ( MaxRouteNum * sizeof ( int ) );
- multiOccu = ( boolean * ) ckalloc ( MaxRouteNum * sizeof ( boolean ) );
- short * MULTI1 = ( short * ) ckalloc ( MaxRouteNum * sizeof ( short ) );
- short * MULTI2 = ( short * ) ckalloc ( MaxRouteNum * sizeof ( short ) );
- soFarSeq = ( char * ) ckalloc ( max * sizeof ( char ) );
- soFarNode = ( kmer_t ** ) ckalloc ( ( max + 1 ) * sizeof ( kmer_t * ) );
- foundRoutes = ( char ** ) ckalloc ( MaxRouteNum * sizeof ( char * ) );;
- avgLinks = ( double * ) ckalloc ( MaxRouteNum * sizeof ( double ) );;
-
- for ( i = 0; i < MaxRouteNum; i++ )
- {
- foundRoutes[i] = ( char * ) ckalloc ( max * sizeof ( char ) );
- }
-
- for ( i = len1 - 1; i >= 0; i-- )
- {
- num_route = traceCounter = soFarLinks = 0;
- int steps = 0;
- traceAlongDBgraph ( kmerCtg1[i], steps, min, max, &num_route, kset, kmerCtg2, len2, overlap, WordFilter,
- foundRoutes, routeEndOnCtg2, routeLens, soFarSeq, &traceCounter, MaxRouteNum, soFarNode,
- multiOccu, &soFarLinks, avgLinks );
-
- if ( num_route > 0 )
- {
- int m, minEnd = routeEndOnCtg2[0];
-
- for ( m = 0; m < num_route; m++ )
- {
- if ( routeLens[m] < 0 )
- {
- continue;
- }
-
- if ( routeEndOnCtg2[m] < minEnd )
- {
- minEnd = routeEndOnCtg2[m];
- }
- }
-
- /* else if(minFreq>1){
- for(m=0;m<num_route;m++){
- if(routeEndOnCtg2[m]!=minEnd)
- continue;
- for(j=0;j<max;j++){
- if(foundRoutes[m][j]>3)
- break;
- printf("%c",int2base((int)foundRoutes[m][j]));
- }
- printf(": %4.2f\n",avgLinks[m]);
- }
- } */
- num_route = traceCounter = soFarLinks = 0;
- steps = 0;
- trace4Repeat ( kmerCtg1[i], steps, min, max, &num_route, kset, kmerCtg2[minEnd], overlap, WordFilter, &traceCounter,
- MaxRouteNum, soFarNode, MULTI1, MULTI2, routeLens, foundRoutes, soFarSeq, &soFarLinks, avgLinks );
- int j, best = 0;
- int maxLen = routeLens[0];
- double maxLink = avgLinks[0];
- char * pt;
- boolean repeat = 0, sameLen = 1;
- int leftMost = max, rightMost = max;
+ int i;
+ int ret = 0;
+ kmer_t *node, **soFarNode;
+ int num_route;
+ int gapLen = ctg2->start - ctg1->end - origOverlap + overlap;
+ int min = gapLen - GLDiff > 0 ? gapLen - GLDiff : 0; //0531
+ int max = gapLen + GLDiff < 10 ? 10 : gapLen + GLDiff;
+ char **foundRoutes;
+ char *soFarSeq;
+ int traceCounter;
+ int *routeEndOnCtg2;
+ int *routeLens;
+ boolean *multiOccu;
+ long long soFarLinks;
+ double *avgLinks;
+ //mask linear internal linear kmer on contig1 end
+ routeEndOnCtg2 = ( int * ) ckalloc ( MaxRouteNum * sizeof ( int ) );
+ routeLens = ( int * ) ckalloc ( MaxRouteNum * sizeof ( int ) );
+ multiOccu = ( boolean * ) ckalloc ( MaxRouteNum * sizeof ( boolean ) );
+ short *MULTI1 = ( short * ) ckalloc ( MaxRouteNum * sizeof ( short ) );
+ short *MULTI2 = ( short * ) ckalloc ( MaxRouteNum * sizeof ( short ) );
+ soFarSeq = ( char * ) ckalloc ( max * sizeof ( char ) );
+ soFarNode = ( kmer_t ** ) ckalloc ( ( max + 1 ) * sizeof ( kmer_t * ) );
+ foundRoutes = ( char ** ) ckalloc ( MaxRouteNum * sizeof ( char * ) );;
+ avgLinks = ( double * ) ckalloc ( MaxRouteNum * sizeof ( double ) );;
+
+ for ( i = 0; i < MaxRouteNum; i++ )
+ {
+ foundRoutes[i] = ( char * ) ckalloc ( max * sizeof ( char ) );
+ }
+
+ for ( i = len1 - 1; i >= 0; i-- )
+ {
+ num_route = traceCounter = soFarLinks = 0;
+ int steps = 0;
+ traceAlongDBgraph ( kmerCtg1[i], steps, min, max, &num_route, kset, kmerCtg2, len2, overlap, WordFilter,
+ foundRoutes, routeEndOnCtg2, routeLens, soFarSeq, &traceCounter, MaxRouteNum, soFarNode,
+ multiOccu, &soFarLinks, avgLinks );
+
+ if ( num_route > 0 )
+ {
+ int m, minEnd = routeEndOnCtg2[0];
+
+ for ( m = 0; m < num_route; m++ )
+ {
+ if ( routeLens[m] < 0 )
+ {
+ continue;
+ }
+
+ if ( routeEndOnCtg2[m] < minEnd )
+ {
+ minEnd = routeEndOnCtg2[m];
+ }
+ }
+
+ /* else if(minFreq>1){
+ for(m=0;m<num_route;m++){
+ if(routeEndOnCtg2[m]!=minEnd)
+ continue;
+ for(j=0;j<max;j++){
+ if(foundRoutes[m][j]>3)
+ break;
+ printf("%c",int2base((int)foundRoutes[m][j]));
+ }
+ printf(": %4.2f\n",avgLinks[m]);
+ }
+ } */
+ num_route = traceCounter = soFarLinks = 0;
+ steps = 0;
+ trace4Repeat ( kmerCtg1[i], steps, min, max, &num_route, kset, kmerCtg2[minEnd], overlap, WordFilter, &traceCounter,
+ MaxRouteNum, soFarNode, MULTI1, MULTI2, routeLens, foundRoutes, soFarSeq, &soFarLinks, avgLinks );
+ int j, best = 0;
+ int maxLen = routeLens[0];
+ double maxLink = avgLinks[0];
+ char *pt;
+ boolean repeat = 0, sameLen = 1;
+ int leftMost = max, rightMost = max;
#ifdef DEBUG
- if ( num_route < 1 )
- {
- fprintf ( stderr, "After trace4Repeat: non route was found.\n" );
- continue;
- }
+ if ( num_route < 1 )
+ {
+ fprintf ( stderr, "After trace4Repeat: non route was found.\n" );
+ continue;
+ }
#else
- if ( num_route < 1 ) { continue; }
+ if ( num_route < 1 )
+ {
+ continue;
+ }
#endif
- if ( num_route > 1 )
- {
- // if multi paths are found, we check on the repeatative occurrences and links/length
- for ( m = 0; m < num_route; m++ )
- {
- if ( routeLens[m] < 0 )
- {
- continue;
- }
-
- if ( MULTI1[m] >= 0 && MULTI2[m] >= 0 )
- {
- repeat = 1;
- leftMost = leftMost > MULTI1[m] ? MULTI1[m] : leftMost;
- rightMost = rightMost > MULTI2[m] ? MULTI2[m] : rightMost;
- }
-
- if ( routeLens[m] != maxLen )
- {
- sameLen = 0;
- }
-
- if ( routeLens[m] < maxLen )
- {
- maxLen = routeLens[m];
- }
-
- if ( avgLinks[m] > maxLink )
- {
- maxLink = avgLinks[m];
- best = m;
- }
- }
- }
-
- if ( repeat )
- {
- *offset1 = *offset2 = *cut1 = *cut2 = 0;
- int index = 0;
- char ch;
-
- for ( j = 0; j < leftMost; j++ )
- {
- if ( routeLens[0] < j + overlap + 1 )
- {
- break;
- }
- else
- {
- ch = foundRoutes[0][j];
- }
-
- for ( m = 1; m < num_route; m++ )
- {
- if ( routeLens[m] < 0 )
- {
- continue;
- }
-
- if ( ch != foundRoutes[m][j] )
- {
- break;
- }
- }
-
- if ( m == num_route )
- {
- seqGap[index++] = ch;
- }
- else
- {
- break;
- }
- }
-
- *offset1 = index;
- index = 0;
-
- for ( j = 0; j < rightMost; j++ )
- {
- if ( routeLens[0] - overlap - 1 < j )
- {
- break;
- }
- else
- {
- ch = foundRoutes[0][routeLens[0] - overlap - 1 - j];
- }
-
- for ( m = 1; m < num_route; m++ )
- {
- if ( routeLens[m] < 0 )
- {
- continue;
- }
-
- if ( ch != foundRoutes[m][routeLens[m] - overlap - 1 - j] )
- {
- break;
- }
- }
-
- if ( m == num_route )
- {
- index++;
- }
- else
- {
- break;
- }
- }
-
- *offset2 = index;
-
- for ( j = 0; j < *offset2; j++ )
- {
- seqGap[*offset1 + *offset2 - 1 - j] = foundRoutes[0][routeLens[0] - overlap - 1 - j];
- }
-
- if ( *offset1 > 0 || *offset2 > 0 )
- {
- *cut1 = len1 - i - 1;
- *cut2 = minEnd;
-
- //fprintf(stderr,"\n");
- for ( m = 0; m < num_route; m++ )
- {
- for ( j = 0; j < max; j++ )
- {
- if ( foundRoutes[m][j] > 3 )
- {
- break;
- }
-
- //fprintf(stderr,"%c",int2base((int)foundRoutes[m][j]));
- }
-
- //fprintf(stderr,": %4.2f\n",avgLinks[m]);
- }
-
- /*
- fprintf(stderr,">Gap (%d + %d) (%d + %d)\n",*offset1,*offset2,*cut1,*cut2);
- for(index=0;index<*offset1+*offset2;index++)
- fprintf(stderr,"%c",int2base(seqGap[index]));
- fprintf(stderr,"\n"); */
- }
-
- ret = 3;
- break;
- }
-
- if ( overlap + ( len1 - i - 1 ) + minEnd - routeLens[best] > ( int ) origOverlap )
- {
- continue;
- }
-
- ctg1->gapSeqOffset = gapSeqArray->item_c;
- ctg1->gapSeqLen = routeLens[best];
-
- if ( !darrayPut ( gapSeqArray, ctg1->gapSeqOffset + maxLen / 4 ) )
- {
- continue;
- }
-
- pt = ( char * ) darrayPut ( gapSeqArray, ctg1->gapSeqOffset );
-
- /*
- printKmerSeqLocal(stderr,kmerCtg1[i],overlap);
- fprintf(stderr,"-");
- */
- for ( j = 0; j < max; j++ )
- {
- if ( foundRoutes[best][j] > 3 )
- {
- break;
- }
-
- writeChar2tightString ( foundRoutes[best][j], pt, j );
- //fprintf(stderr,"%c",int2base((int)foundRoutes[best][j]));
- }
-
- //fprintf(stderr,": GAPSEQ %d + %d, avglink %4.2f\n",len1-i-1,minEnd,avgLinks[best]);
- ctg1->cutTail = len1 - i - 1;
- ctg2->cutHead = overlap + minEnd;
- ctg2->scaftig_start = 0;
- ret = 1;
- break;
- /* }if(num_route>1){
- ret = 2;
- break; */
- }
- else //mark node which leads to dead end
- {
- node = searchNode ( kmerCtg1[i], kset, overlap );
-
- if ( node )
- {
- node->twin = 2;
- }
- }
- }
-
- for ( i = 0; i < MaxRouteNum; i++ )
- {
- free ( ( void * ) foundRoutes[i] );
- }
-
- free ( ( void * ) soFarSeq );
- free ( ( void * ) soFarNode );
- free ( ( void * ) multiOccu );
- free ( ( void * ) MULTI1 );
- free ( ( void * ) MULTI2 );
- free ( ( void * ) foundRoutes );
- free ( ( void * ) routeEndOnCtg2 );
- free ( ( void * ) routeLens );
- return ret;
+ if ( num_route > 1 )
+ {
+ // if multi paths are found, we check on the repeatative occurrences and links/length
+ for ( m = 0; m < num_route; m++ )
+ {
+ if ( routeLens[m] < 0 )
+ {
+ continue;
+ }
+
+ if ( MULTI1[m] >= 0 && MULTI2[m] >= 0 )
+ {
+ repeat = 1;
+ leftMost = leftMost > MULTI1[m] ? MULTI1[m] : leftMost;
+ rightMost = rightMost > MULTI2[m] ? MULTI2[m] : rightMost;
+ }
+
+ if ( routeLens[m] != maxLen )
+ {
+ sameLen = 0;
+ }
+
+ if ( routeLens[m] < maxLen )
+ {
+ maxLen = routeLens[m];
+ }
+
+ if ( avgLinks[m] > maxLink )
+ {
+ maxLink = avgLinks[m];
+ best = m;
+ }
+ }
+ }
+
+ if ( repeat )
+ {
+ *offset1 = *offset2 = *cut1 = *cut2 = 0;
+ int index = 0;
+ char ch;
+
+ for ( j = 0; j < leftMost; j++ )
+ {
+ if ( routeLens[0] < j + overlap + 1 )
+ {
+ break;
+ }
+ else
+ {
+ ch = foundRoutes[0][j];
+ }
+
+ for ( m = 1; m < num_route; m++ )
+ {
+ if ( routeLens[m] < 0 )
+ {
+ continue;
+ }
+
+ if ( ch != foundRoutes[m][j] )
+ {
+ break;
+ }
+ }
+
+ if ( m == num_route )
+ {
+ seqGap[index++] = ch;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ *offset1 = index;
+ index = 0;
+
+ for ( j = 0; j < rightMost; j++ )
+ {
+ if ( routeLens[0] - overlap - 1 < j )
+ {
+ break;
+ }
+ else
+ {
+ ch = foundRoutes[0][routeLens[0] - overlap - 1 - j];
+ }
+
+ for ( m = 1; m < num_route; m++ )
+ {
+ if ( routeLens[m] < 0 )
+ {
+ continue;
+ }
+
+ if ( ch != foundRoutes[m][routeLens[m] - overlap - 1 - j] )
+ {
+ break;
+ }
+ }
+
+ if ( m == num_route )
+ {
+ index++;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ *offset2 = index;
+
+ for ( j = 0; j < *offset2; j++ )
+ {
+ seqGap[*offset1 + *offset2 - 1 - j] = foundRoutes[0][routeLens[0] - overlap - 1 - j];
+ }
+
+ if ( *offset1 > 0 || *offset2 > 0 )
+ {
+ *cut1 = len1 - i - 1;
+ *cut2 = minEnd;
+
+ //fprintf(stderr,"\n");
+ for ( m = 0; m < num_route; m++ )
+ {
+ for ( j = 0; j < max; j++ )
+ {
+ if ( foundRoutes[m][j] > 3 )
+ {
+ break;
+ }
+
+ //fprintf(stderr,"%c",int2base((int)foundRoutes[m][j]));
+ }
+
+ //fprintf(stderr,": %4.2f\n",avgLinks[m]);
+ }
+
+ /*
+ fprintf(stderr,">Gap (%d + %d) (%d + %d)\n",*offset1,*offset2,*cut1,*cut2);
+ for(index=0;index<*offset1+*offset2;index++)
+ fprintf(stderr,"%c",int2base(seqGap[index]));
+ fprintf(stderr,"\n"); */
+ }
+
+ ret = 3;
+ break;
+ }
+
+ if ( overlap + ( len1 - i - 1 ) + minEnd - routeLens[best] > ( int ) origOverlap )
+ {
+ continue;
+ }
+
+ ctg1->gapSeqOffset = gapSeqArray->item_c;
+ ctg1->gapSeqLen = routeLens[best];
+
+ if ( !darrayPut ( gapSeqArray, ctg1->gapSeqOffset + maxLen / 4 ) )
+ {
+ continue;
+ }
+
+ pt = ( char * ) darrayPut ( gapSeqArray, ctg1->gapSeqOffset );
+
+ /*
+ printKmerSeqLocal(stderr,kmerCtg1[i],overlap);
+ fprintf(stderr,"-");
+ */
+ for ( j = 0; j < max; j++ )
+ {
+ if ( foundRoutes[best][j] > 3 )
+ {
+ break;
+ }
+
+ writeChar2tightString ( foundRoutes[best][j], pt, j );
+ //fprintf(stderr,"%c",int2base((int)foundRoutes[best][j]));
+ }
+
+ //fprintf(stderr,": GAPSEQ %d + %d, avglink %4.2f\n",len1-i-1,minEnd,avgLinks[best]);
+ ctg1->cutTail = len1 - i - 1;
+ ctg2->cutHead = overlap + minEnd;
+ ctg2->scaftig_start = 0;
+ ret = 1;
+ break;
+ /* }if(num_route>1){
+ ret = 2;
+ break; */
+ }
+ else //mark node which leads to dead end
+ {
+ node = searchNode ( kmerCtg1[i], kset, overlap );
+
+ if ( node )
+ {
+ node->twin = 2;
+ }
+ }
+ }
+
+ for ( i = 0; i < MaxRouteNum; i++ )
+ {
+ free ( ( void * ) foundRoutes[i] );
+ }
+
+ free ( ( void * ) soFarSeq );
+ free ( ( void * ) soFarNode );
+ free ( ( void * ) multiOccu );
+ free ( ( void * ) MULTI1 );
+ free ( ( void * ) MULTI2 );
+ free ( ( void * ) foundRoutes );
+ free ( ( void * ) routeEndOnCtg2 );
+ free ( ( void * ) routeLens );
+ return ret;
}
-static void trace4Repeat ( Kmer currW, int steps, int min, int max, int * num_route, KmerSet * kset, Kmer kmerDest, int overlap, Kmer WORDF,
- int * traceCounter, int maxRoute, kmer_t ** soFarNode, short * multiOccu1, short * multiOccu2, int * routeLens,
- char ** foundRoutes, char * soFarSeq, long long * soFarLinks, double * avgLinks )
+static void trace4Repeat ( Kmer currW, int steps, int min, int max, int *num_route, KmerSet *kset, Kmer kmerDest, int overlap, Kmer WORDF,
+ int *traceCounter, int maxRoute, kmer_t **soFarNode, short *multiOccu1, short *multiOccu2, int *routeLens,
+ char **foundRoutes, char *soFarSeq, long long *soFarLinks, double *avgLinks )
{
- ( *traceCounter ) ++;
-
- if ( *traceCounter > UPlimit )
- {
- return;
- }
-
- if ( steps > max || *num_route >= maxRoute )
- {
- return;
- }
-
- Kmer word = reverseComplement ( currW, overlap );
- boolean isSmaller = KmerSmaller ( currW, word );
- char ch;
- unsigned char links;
- int index, i;
-
- if ( isSmaller )
- {
- word = currW;
- }
-
- kmer_t * node;
- boolean found = search_kmerset ( kset, word, &node );
-
- // #ifdef DEBUG
- if ( !found )
- {
- fprintf ( stderr, "%s Trace: can't find kmer ", __FUNCTION__ );
- PrintKmer ( stderr, word );
- fprintf ( stderr, " (input " );
- PrintKmer ( stderr, currW );
- fprintf ( stderr, ") at step %d.\n", steps );
- /*
- #ifdef MER127
- printf ( "%s Trace: can't find kmer %llx %llx %llx %llx (input %llx %llx %llx %llx) at step %d\n",
- __FUNCTION__, word.high1, word.low1, word.high2, word.low2, currW.high1, currW.low1,
- currW.high2, currW.low2, steps );
- #else
- printf ( "Trace: can't find kmer %llx %llx (input %llx %llx) at step %d\n",
- word.high, word.low, currW.high, currW.low, steps );
- #endif
- */
- return;
- }
-
- // #else
- // if (!found) return;
- // #endif
-
- if ( soFarNode )
- {
- soFarNode[steps] = node;
- }
-
- if ( soFarSeq && steps > 0 )
- {
- soFarSeq[steps - 1] = lastCharInKmer ( currW );
- }
-
- int linkCounter;
-
- if ( soFarLinks )
- {
- linkCounter = *soFarLinks;
- }
-
- if ( steps >= min && KmerEqual ( currW, kmerDest ) )
- {
- index = *num_route;
-
- if ( avgLinks && steps > 0 )
- {
- avgLinks[index] = ( double ) linkCounter / steps;
- }
- else if ( avgLinks )
- {
- avgLinks[index] = 0;
- }
-
- //find node that appears more than once in the path
- if ( multiOccu1 && multiOccu2 )
- {
- for ( i = 0; i < steps + 1; i++ )
- {
- soFarNode[i]->deleted = 0;
- }
-
- int rightMost = 0;
- boolean MULTI = 0;
-
- for ( i = 0; i < steps + 1; i++ )
- {
- if ( soFarNode[i]->deleted )
- {
- rightMost = rightMost < i - 1 ? i - 1 : rightMost;
- MULTI = 1;
- }
-
- soFarNode[i]->deleted = 1;
- }
-
- if ( !MULTI )
- {
- multiOccu1[index] = multiOccu2[index] = -1;
- }
- else
- {
- multiOccu2[index] = steps - 2 - rightMost < 0 ? 0 : steps - 2 - rightMost; //[0 steps-2]
-
- for ( i = 0; i < steps + 1; i++ )
- {
- soFarNode[i]->deleted = 0;
- }
-
- int leftMost = steps - 2;
-
- for ( i = steps; i >= 0; i-- )
- {
- if ( soFarNode[i]->deleted )
- {
- leftMost = leftMost > i - 1 ? i - 1 : leftMost;
- }
-
- soFarNode[i]->deleted = 1;
- }
-
- multiOccu1[index] = leftMost < 0 ? 0 : leftMost; //[0 steps-2]
- }
- }
-
- if ( routeLens )
- {
- routeLens[index] = steps;
- }
-
- if ( soFarSeq )
- {
- char * array = foundRoutes[index];
-
- for ( i = 0; i < steps; i++ )
- {
- array[i] = soFarSeq[i];
- }
-
- if ( i < max )
- {
- array[i] = 4;
- } //indicate the end of the sequence
- }
-
- *num_route = ++index;
- }
-
- steps++;
-
- if ( isSmaller )
- {
- int array[] = { 0, 1, 2, 3 };
-
- for ( i = 4; i > 0; i-- )
- {
- ch = nPick1 ( array, i );
- links = get_kmer_right_cov ( *node, ch );
-
- if ( !links )
- {
- continue;
- }
-
- if ( soFarLinks )
- {
- *soFarLinks = linkCounter + links;
- }
-
- word = nextKmerLocal ( currW, ch, WORDF );
- trace4Repeat ( word, steps, min, max, num_route, kset, kmerDest, overlap, WORDF, traceCounter, maxRoute, soFarNode,
- multiOccu1, multiOccu2, routeLens, foundRoutes, soFarSeq, soFarLinks, avgLinks );
- }
- }
- else
- {
- int array[] = { 0, 1, 2, 3 };
-
- for ( i = 4; i > 0; i-- )
- {
- ch = nPick1 ( array, i );
- links = get_kmer_left_cov ( *node, ch );
-
- if ( !links )
- {
- continue;
- }
-
- if ( soFarLinks )
- {
- *soFarLinks = linkCounter + links;
- }
-
- word = nextKmerLocal ( currW, int_comp ( ch ), WORDF );
- trace4Repeat ( word, steps, min, max, num_route, kset, kmerDest, overlap, WORDF, traceCounter, maxRoute, soFarNode,
- multiOccu1, multiOccu2, routeLens, foundRoutes, soFarSeq, soFarLinks, avgLinks );
- }
- }
+ ( *traceCounter ) ++;
+
+ if ( *traceCounter > UPlimit )
+ {
+ return;
+ }
+
+ if ( steps > max || *num_route >= maxRoute )
+ {
+ return;
+ }
+
+ Kmer word = reverseComplement ( currW, overlap );
+ boolean isSmaller = KmerSmaller ( currW, word );
+ char ch;
+ unsigned char links;
+ int index, i;
+
+ if ( isSmaller )
+ {
+ word = currW;
+ }
+
+ kmer_t *node;
+ boolean found = search_kmerset ( kset, word, &node );
+
+ // #ifdef DEBUG
+ if ( !found )
+ {
+ fprintf ( stderr, "%s Trace: can't find kmer ", __FUNCTION__ );
+ PrintKmer ( stderr, word );
+ fprintf ( stderr, " (input " );
+ PrintKmer ( stderr, currW );
+ fprintf ( stderr, ") at step %d.\n", steps );
+ /*
+ #ifdef MER127
+ printf ( "%s Trace: can't find kmer %llx %llx %llx %llx (input %llx %llx %llx %llx) at step %d\n",
+ __FUNCTION__, word.high1, word.low1, word.high2, word.low2, currW.high1, currW.low1,
+ currW.high2, currW.low2, steps );
+ #else
+ printf ( "Trace: can't find kmer %llx %llx (input %llx %llx) at step %d\n",
+ word.high, word.low, currW.high, currW.low, steps );
+ #endif
+ */
+ return;
+ }
+
+ // #else
+ // if (!found) return;
+ // #endif
+
+ if ( soFarNode )
+ {
+ soFarNode[steps] = node;
+ }
+
+ if ( soFarSeq && steps > 0 )
+ {
+ soFarSeq[steps - 1] = lastCharInKmer ( currW );
+ }
+
+ int linkCounter;
+
+ if ( soFarLinks )
+ {
+ linkCounter = *soFarLinks;
+ }
+
+ if ( steps >= min && KmerEqual ( currW, kmerDest ) )
+ {
+ index = *num_route;
+
+ if ( avgLinks && steps > 0 )
+ {
+ avgLinks[index] = ( double ) linkCounter / steps;
+ }
+ else if ( avgLinks )
+ {
+ avgLinks[index] = 0;
+ }
+
+ //find node that appears more than once in the path
+ if ( multiOccu1 && multiOccu2 )
+ {
+ for ( i = 0; i < steps + 1; i++ )
+ {
+ soFarNode[i]->deleted = 0;
+ }
+
+ int rightMost = 0;
+ boolean MULTI = 0;
+
+ for ( i = 0; i < steps + 1; i++ )
+ {
+ if ( soFarNode[i]->deleted )
+ {
+ rightMost = rightMost < i - 1 ? i - 1 : rightMost;
+ MULTI = 1;
+ }
+
+ soFarNode[i]->deleted = 1;
+ }
+
+ if ( !MULTI )
+ {
+ multiOccu1[index] = multiOccu2[index] = -1;
+ }
+ else
+ {
+ multiOccu2[index] = steps - 2 - rightMost < 0 ? 0 : steps - 2 - rightMost; //[0 steps-2]
+
+ for ( i = 0; i < steps + 1; i++ )
+ {
+ soFarNode[i]->deleted = 0;
+ }
+
+ int leftMost = steps - 2;
+
+ for ( i = steps; i >= 0; i-- )
+ {
+ if ( soFarNode[i]->deleted )
+ {
+ leftMost = leftMost > i - 1 ? i - 1 : leftMost;
+ }
+
+ soFarNode[i]->deleted = 1;
+ }
+
+ multiOccu1[index] = leftMost < 0 ? 0 : leftMost; //[0 steps-2]
+ }
+ }
+
+ if ( routeLens )
+ {
+ routeLens[index] = steps;
+ }
+
+ if ( soFarSeq )
+ {
+ char *array = foundRoutes[index];
+
+ for ( i = 0; i < steps; i++ )
+ {
+ array[i] = soFarSeq[i];
+ }
+
+ if ( i < max )
+ {
+ array[i] = 4;
+ } //indicate the end of the sequence
+ }
+
+ *num_route = ++index;
+ }
+
+ steps++;
+
+ if ( isSmaller )
+ {
+ int array[] = { 0, 1, 2, 3 };
+
+ for ( i = 4; i > 0; i-- )
+ {
+ ch = nPick1 ( array, i );
+ links = get_kmer_right_cov ( *node, ch );
+
+ if ( !links )
+ {
+ continue;
+ }
+
+ if ( soFarLinks )
+ {
+ *soFarLinks = linkCounter + links;
+ }
+
+ word = nextKmerLocal ( currW, ch, WORDF );
+ trace4Repeat ( word, steps, min, max, num_route, kset, kmerDest, overlap, WORDF, traceCounter, maxRoute, soFarNode,
+ multiOccu1, multiOccu2, routeLens, foundRoutes, soFarSeq, soFarLinks, avgLinks );
+ }
+ }
+ else
+ {
+ int array[] = { 0, 1, 2, 3 };
+
+ for ( i = 4; i > 0; i-- )
+ {
+ ch = nPick1 ( array, i );
+ links = get_kmer_left_cov ( *node, ch );
+
+ if ( !links )
+ {
+ continue;
+ }
+
+ if ( soFarLinks )
+ {
+ *soFarLinks = linkCounter + links;
+ }
+
+ word = nextKmerLocal ( currW, int_comp ( ch ), WORDF );
+ trace4Repeat ( word, steps, min, max, num_route, kset, kmerDest, overlap, WORDF, traceCounter, maxRoute, soFarNode,
+ multiOccu1, multiOccu2, routeLens, foundRoutes, soFarSeq, soFarLinks, avgLinks );
+ }
+ }
}
//found repeat node on contig ends
-static void maskRepeatNode ( KmerSet * kset, Kmer * kmerCtg1, Kmer * kmerCtg2, int overlap, int len1, int len2, int max, Kmer WordFilter )
+static void maskRepeatNode ( KmerSet *kset, Kmer *kmerCtg1, Kmer *kmerCtg2, int overlap, int len1, int len2, int max, Kmer WordFilter )
{
- int i;
- int num_route, steps;
- int min = 1, maxRoute = 1;
- int traceCounter;
- Kmer word, bal_word;
- kmer_t * node;
- boolean found;
- int counter = 0;
-
- for ( i = 0; i < len1; i++ )
- {
- word = kmerCtg1[i];
- bal_word = reverseComplement ( word, overlap );
-
- if ( KmerLarger ( word, bal_word ) )
- {
- word = bal_word;
- }
-
- found = search_kmerset ( kset, word, &node );
-
- if ( !found || node->linear )
- {
- //printf("Found no node for kmer %llx\n",word);
- continue;
- }
-
- num_route = traceCounter = 0;
- steps = 0;
- trace4Repeat ( word, steps, min, max, &num_route, kset, word, overlap, WordFilter, &traceCounter, maxRoute, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );
-
- if ( num_route < 1 )
- {
- continue;
- }
-
- counter++;
- node->checked = 1;
- }
-
- for ( i = 0; i < len2; i++ )
- {
- word = kmerCtg2[i];
- bal_word = reverseComplement ( word, overlap );
-
- if ( KmerLarger ( word, bal_word ) )
- {
- word = bal_word;
- }
-
- found = search_kmerset ( kset, word, &node );
-
- if ( !found || node->linear )
- {
- //printf("Found no node for kmer %llx\n",word);
- continue;
- }
-
- num_route = traceCounter = 0;
- steps = 0;
- trace4Repeat ( word, steps, min, max, &num_route, kset, word, overlap, WordFilter, &traceCounter, maxRoute, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );
-
- if ( num_route < 1 )
- {
- continue;
- }
-
- counter++;
- node->checked = 1;
- }
-
- //printf("MR: %d(%d)\n",counter,len1+len2);
+ int i;
+ int num_route, steps;
+ int min = 1, maxRoute = 1;
+ int traceCounter;
+ Kmer word, bal_word;
+ kmer_t *node;
+ boolean found;
+ int counter = 0;
+
+ for ( i = 0; i < len1; i++ )
+ {
+ word = kmerCtg1[i];
+ bal_word = reverseComplement ( word, overlap );
+
+ if ( KmerLarger ( word, bal_word ) )
+ {
+ word = bal_word;
+ }
+
+ found = search_kmerset ( kset, word, &node );
+
+ if ( !found || node->linear )
+ {
+ //printf("Found no node for kmer %llx\n",word);
+ continue;
+ }
+
+ num_route = traceCounter = 0;
+ steps = 0;
+ trace4Repeat ( word, steps, min, max, &num_route, kset, word, overlap, WordFilter, &traceCounter, maxRoute, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );
+
+ if ( num_route < 1 )
+ {
+ continue;
+ }
+
+ counter++;
+ node->checked = 1;
+ }
+
+ for ( i = 0; i < len2; i++ )
+ {
+ word = kmerCtg2[i];
+ bal_word = reverseComplement ( word, overlap );
+
+ if ( KmerLarger ( word, bal_word ) )
+ {
+ word = bal_word;
+ }
+
+ found = search_kmerset ( kset, word, &node );
+
+ if ( !found || node->linear )
+ {
+ //printf("Found no node for kmer %llx\n",word);
+ continue;
+ }
+
+ num_route = traceCounter = 0;
+ steps = 0;
+ trace4Repeat ( word, steps, min, max, &num_route, kset, word, overlap, WordFilter, &traceCounter, maxRoute, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );
+
+ if ( num_route < 1 )
+ {
+ continue;
+ }
+
+ counter++;
+ node->checked = 1;
+ }
+
+ //printf("MR: %d(%d)\n",counter,len1+len2);
}
/*
@@ -1563,137 +1566,137 @@ static boolean readsCrossGap(READNEARBY *rdArray, int num, int originOverlap,DAR
return ret;
}
*/
-static void kmerSet_markTandem ( KmerSet * set, Kmer WordFilter, int overlap );
-static boolean readsCrossGap ( READNEARBY * rdArray, int num, int originOverlap, DARRAY * gapSeqArray,
- Kmer * kmerCtg1, Kmer * kmerCtg2, int overlap, CTGinSCAF * ctg1, CTGinSCAF * ctg2,
- KmerSet * kmerS, Kmer WordFilter, int min, int max, int offset1, int offset2,
- char * seqGap, char * seqCtg1, char * seqCtg2, int cut1, int cut2 );
-
-int localGraph ( READNEARBY * rdArray, int num, CTGinSCAF * ctg1, CTGinSCAF * ctg2,
- int origOverlap, Kmer * kmerCtg1, Kmer * kmerCtg2, int overlap,
- DARRAY * gapSeqArray, char * seqCtg1, char * seqCtg2, char * seqGap )
+static void kmerSet_markTandem ( KmerSet *set, Kmer WordFilter, int overlap );
+static boolean readsCrossGap ( READNEARBY *rdArray, int num, int originOverlap, DARRAY *gapSeqArray,
+ Kmer *kmerCtg1, Kmer *kmerCtg2, int overlap, CTGinSCAF *ctg1, CTGinSCAF *ctg2,
+ KmerSet *kmerS, Kmer WordFilter, int min, int max, int offset1, int offset2,
+ char *seqGap, char *seqCtg1, char *seqCtg2, int cut1, int cut2 );
+
+int localGraph ( READNEARBY *rdArray, int num, CTGinSCAF *ctg1, CTGinSCAF *ctg2,
+ int origOverlap, Kmer *kmerCtg1, Kmer *kmerCtg2, int overlap,
+ DARRAY *gapSeqArray, char *seqCtg1, char *seqCtg2, char *seqGap )
{
- /**************** put kmer in DBgraph ****************/
- KmerSet * kmerSet;
- Kmer WordFilter = createFilter ( overlap );
- /*
- if(ctg1->ctgID==56410&&ctg2->ctgID==61741)
- printf("Extract %d reads for gap [%d %d]\n",num,ctg1->ctgID,ctg2->ctgID);
- */
- kmerSet = readsInGap2DBgraph ( rdArray, num, ctg1, ctg2, origOverlap, kmerCtg1, kmerCtg2, overlap, WordFilter );
-
- if ( !kmerSet )
- {
- //printf("no kmer found\n");
- return 0;
- }
-
- time_t tt;
- time ( &tt );
- //srand48 ( ( int ) tt );
- /*
- int i,j;
- for(i=0;i<2;i++){
- int array[] = {0,1,2,3};
- for(j=4;j>0;j--)
- fprintf(stderr,"%d ", nPick1(array,j));
- }
- fprintf(stderr,"\n");
- */
- /***************** search path to connect contig ends ********/
- int gapLen = ctg2->start - ctg1->end - origOverlap + overlap;
- int min = gapLen - GLDiff > 0 ? gapLen - GLDiff : 0;
- int max = gapLen + GLDiff < 10 ? 10 : gapLen + GLDiff;
- //count kmer number for contig1 and contig2 ends
- int len1, len2;
- len1 = CTGendLen < contig_array[ctg1->ctgID].length + origOverlap ? CTGendLen : contig_array[ctg1->ctgID].length + origOverlap;
- len2 = CTGendLen < contig_array[ctg2->ctgID].length + origOverlap ? CTGendLen : contig_array[ctg2->ctgID].length + origOverlap;
- len1 -= overlap - 1;
- len2 -= overlap - 1;
- //int pathNum = 2;
- int offset1 = 0, offset2 = 0, cut1 = 0, cut2 = 0;
- int pathNum = searchFgap ( kmerSet, ctg1, ctg2, kmerCtg1, kmerCtg2,
- origOverlap, overlap, gapSeqArray,
- len1, len2, WordFilter, &offset1, &offset2, seqGap, &cut1, &cut2 );
-
- //printf("SF: %d K %d\n",pathNum,overlap);
- if ( pathNum == 0 )
- {
- free_kmerset ( kmerSet );
- return 0;
- }
- else if ( pathNum == 1 )
- {
- free_kmerset ( kmerSet );
- return 1;
- } /*
+ /**************** put kmer in DBgraph ****************/
+ KmerSet *kmerSet;
+ Kmer WordFilter = createFilter ( overlap );
+ /*
+ if(ctg1->ctgID==56410&&ctg2->ctgID==61741)
+ printf("Extract %d reads for gap [%d %d]\n",num,ctg1->ctgID,ctg2->ctgID);
+ */
+ kmerSet = readsInGap2DBgraph ( rdArray, num, ctg1, ctg2, origOverlap, kmerCtg1, kmerCtg2, overlap, WordFilter );
+
+ if ( !kmerSet )
+ {
+ //printf("no kmer found\n");
+ return 0;
+ }
+
+ time_t tt;
+ time ( &tt );
+ //srand48 ( ( int ) tt );
+ /*
+ int i,j;
+ for(i=0;i<2;i++){
+ int array[] = {0,1,2,3};
+ for(j=4;j>0;j--)
+ fprintf(stderr,"%d ", nPick1(array,j));
+ }
+ fprintf(stderr,"\n");
+ */
+ /***************** search path to connect contig ends ********/
+ int gapLen = ctg2->start - ctg1->end - origOverlap + overlap;
+ int min = gapLen - GLDiff > 0 ? gapLen - GLDiff : 0;
+ int max = gapLen + GLDiff < 10 ? 10 : gapLen + GLDiff;
+ //count kmer number for contig1 and contig2 ends
+ int len1, len2;
+ len1 = CTGendLen < contig_array[ctg1->ctgID].length + origOverlap ? CTGendLen : contig_array[ctg1->ctgID].length + origOverlap;
+ len2 = CTGendLen < contig_array[ctg2->ctgID].length + origOverlap ? CTGendLen : contig_array[ctg2->ctgID].length + origOverlap;
+ len1 -= overlap - 1;
+ len2 -= overlap - 1;
+ //int pathNum = 2;
+ int offset1 = 0, offset2 = 0, cut1 = 0, cut2 = 0;
+ int pathNum = searchFgap ( kmerSet, ctg1, ctg2, kmerCtg1, kmerCtg2,
+ origOverlap, overlap, gapSeqArray,
+ len1, len2, WordFilter, &offset1, &offset2, seqGap, &cut1, &cut2 );
+
+ //printf("SF: %d K %d\n",pathNum,overlap);
+ if ( pathNum == 0 )
+ {
+ free_kmerset ( kmerSet );
+ return 0;
+ }
+ else if ( pathNum == 1 )
+ {
+ free_kmerset ( kmerSet );
+ return 1;
+ } /*
else{
printf("ret %d\n",pathNum);
free_kmerset(kmerSet);
return 0;
} */
- /******************* cross the gap by single reads *********/
- //kmerSet_markTandem(kmerSet,WordFilter,overlap);
- maskRepeatNode ( kmerSet, kmerCtg1, kmerCtg2, overlap, len1, len2, max, WordFilter );
- boolean found = readsCrossGap ( rdArray, num, origOverlap, gapSeqArray,
- kmerCtg1, kmerCtg2, overlap, ctg1, ctg2, kmerSet, WordFilter, min, max,
- offset1, offset2, seqGap, seqCtg1, seqCtg2, cut1, cut2 );
-
- if ( found )
- {
- //fprintf(stderr,"read across\n");
- free_kmerset ( kmerSet );
- return found;
- }
- else
- {
- free_kmerset ( kmerSet );
- return 0;
- }
+ /******************* cross the gap by single reads *********/
+ //kmerSet_markTandem(kmerSet,WordFilter,overlap);
+ maskRepeatNode ( kmerSet, kmerCtg1, kmerCtg2, overlap, len1, len2, max, WordFilter );
+ boolean found = readsCrossGap ( rdArray, num, origOverlap, gapSeqArray,
+ kmerCtg1, kmerCtg2, overlap, ctg1, ctg2, kmerSet, WordFilter, min, max,
+ offset1, offset2, seqGap, seqCtg1, seqCtg2, cut1, cut2 );
+
+ if ( found )
+ {
+ //fprintf(stderr,"read across\n");
+ free_kmerset ( kmerSet );
+ return found;
+ }
+ else
+ {
+ free_kmerset ( kmerSet );
+ return 0;
+ }
}
-static void kmerSet_markTandem ( KmerSet * set, Kmer WordFilter, int overlap )
+static void kmerSet_markTandem ( KmerSet *set, Kmer WordFilter, int overlap )
{
- kmer_t * rs;
- long long counter = 0;
- int num_route, steps;
- int min = 1, max = overlap, maxRoute = 1;
- int traceCounter;
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- rs = set->array + set->iter_ptr;
-
- if ( rs->inEdge > 0 )
- {
- set->iter_ptr++;
- continue;
- }
-
- num_route = traceCounter = 0;
- steps = 0;
- trace4Repeat ( rs->seq, steps, min, max, &num_route, set, rs->seq, overlap, WordFilter, &traceCounter, maxRoute, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );
-
- if ( num_route < 1 )
- {
- set->iter_ptr++;
- continue;
- }
-
- /*
- printKmerSeqLocal(stderr,rs->seq,overlap);
- fprintf(stderr, "\n");
- */
- rs->checked = 1;
- counter++;
- }
-
- set->iter_ptr++;
- }
+ kmer_t *rs;
+ long long counter = 0;
+ int num_route, steps;
+ int min = 1, max = overlap, maxRoute = 1;
+ int traceCounter;
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ rs = set->array + set->iter_ptr;
+
+ if ( rs->inEdge > 0 )
+ {
+ set->iter_ptr++;
+ continue;
+ }
+
+ num_route = traceCounter = 0;
+ steps = 0;
+ trace4Repeat ( rs->seq, steps, min, max, &num_route, set, rs->seq, overlap, WordFilter, &traceCounter, maxRoute, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL );
+
+ if ( num_route < 1 )
+ {
+ set->iter_ptr++;
+ continue;
+ }
+
+ /*
+ printKmerSeqLocal(stderr,rs->seq,overlap);
+ fprintf(stderr, "\n");
+ */
+ rs->checked = 1;
+ counter++;
+ }
+
+ set->iter_ptr++;
+ }
}
/******************* the following is for read-crossing gaps *************************/
@@ -1703,10 +1706,10 @@ static void kmerSet_markTandem ( KmerSet * set, Kmer WordFilter, int overlap )
static const int INDEL = 0;
static const int SIM[4][4] =
{
- {1, 0, 0, 0},
- {0, 1, 0, 0},
- {0, 0, 1, 0},
- {0, 0, 0, 1}
+ {1, 0, 0, 0},
+ {0, 1, 0, 0},
+ {0, 0, 1, 0},
+ {0, 0, 0, 1}
};
static char fastSequence[MAXREADLENGTH];
static char slowSequence[MAXREADLENGTH];
@@ -1717,659 +1720,659 @@ static int fastToSlowMapping[MAXREADLENGTH + 1];
static int max ( int A, int B, int C )
{
- A = A >= B ? A : B;
- return ( A >= C ? A : C );
+ A = A >= B ? A : B;
+ return ( A >= C ? A : C );
}
-static int compareSequences ( char * sequence1, char * sequence2, int length1, int length2 )
+static int compareSequences ( char *sequence1, char *sequence2, int length1, int length2 )
{
- if ( length1 < 1 || length2 < 1 || length1 > MAXREADLENGTH || length2 > MAXREADLENGTH )
- {
- return 0;
- }
-
- int i, j;
- int Choice1, Choice2, Choice3;
- int maxScore;
-
- for ( i = 0; i <= length1; i++ )
- {
- Fmatrix[i][0] = 0;
- }
-
- for ( j = 0; j <= length2; j++ )
- {
- Fmatrix[0][j] = 0;
- }
-
- for ( i = 1; i <= length1; i++ )
- {
- for ( j = 1; j <= length2; j++ )
- {
- Choice1 = Fmatrix[i - 1][j - 1] + SIM[ ( int ) sequence1[i - 1]][ ( int ) sequence2[j - 1]];
- Choice2 = Fmatrix[i - 1][j] + INDEL;
- Choice3 = Fmatrix[i][j - 1] + INDEL;
- Fmatrix[i][j] = max ( Choice1, Choice2, Choice3 );
- }
- }
-
- maxScore = Fmatrix[length1][length2];
- return maxScore;
+ if ( length1 < 1 || length2 < 1 || length1 > MAXREADLENGTH || length2 > MAXREADLENGTH )
+ {
+ return 0;
+ }
+
+ int i, j;
+ int Choice1, Choice2, Choice3;
+ int maxScore;
+
+ for ( i = 0; i <= length1; i++ )
+ {
+ Fmatrix[i][0] = 0;
+ }
+
+ for ( j = 0; j <= length2; j++ )
+ {
+ Fmatrix[0][j] = 0;
+ }
+
+ for ( i = 1; i <= length1; i++ )
+ {
+ for ( j = 1; j <= length2; j++ )
+ {
+ Choice1 = Fmatrix[i - 1][j - 1] + SIM[ ( int ) sequence1[i - 1]][ ( int ) sequence2[j - 1]];
+ Choice2 = Fmatrix[i - 1][j] + INDEL;
+ Choice3 = Fmatrix[i][j - 1] + INDEL;
+ Fmatrix[i][j] = max ( Choice1, Choice2, Choice3 );
+ }
+ }
+
+ maxScore = Fmatrix[length1][length2];
+ return maxScore;
}
static void mapSlowOntoFast ( int slowSeqLength, int fastSeqLength )
{
- int slowIndex = slowSeqLength;
- int fastIndex = fastSeqLength;
- int fastn, slown;
-
- if ( slowIndex == 0 )
- {
- slowToFastMapping[0] = fastIndex;
-
- while ( fastIndex >= 0 )
- {
- fastToSlowMapping[fastIndex--] = 0;
- }
-
- return;
- }
-
- if ( fastIndex == 0 )
- {
- while ( slowIndex >= 0 )
- {
- slowToFastMapping[slowIndex--] = 0;
- }
-
- fastToSlowMapping[0] = slowIndex;
- return;
- }
-
- while ( slowIndex > 0 && fastIndex > 0 )
- {
- fastn = ( int ) fastSequence[fastIndex - 1]; //getCharInTightString(fastSequence,fastIndex-1);
- slown = ( int ) slowSequence[slowIndex - 1]; //getCharInTightString(slowSequence,slowIndex-1);
-
- if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex - 1][slowIndex - 1] + SIM[fastn][slown] )
- {
- fastToSlowMapping[--fastIndex] = --slowIndex;
- slowToFastMapping[slowIndex] = fastIndex;
- }
- else if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex - 1][slowIndex] + INDEL )
- {
- fastToSlowMapping[--fastIndex] = slowIndex - 1;
- }
- else if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex][slowIndex - 1] + INDEL )
- {
- slowToFastMapping[--slowIndex] = fastIndex - 1;
- }
- else
- {
- fprintf ( stderr, "CompareSequence: Error trace.\n" );
- abort ();
- }
- }
-
- while ( slowIndex > 0 )
- {
- slowToFastMapping[--slowIndex] = -1;
- }
-
- while ( fastIndex > 0 )
- {
- fastToSlowMapping[--fastIndex] = -1;
- }
-
- slowToFastMapping[slowSeqLength] = fastSeqLength;
- fastToSlowMapping[fastSeqLength] = slowSeqLength;
+ int slowIndex = slowSeqLength;
+ int fastIndex = fastSeqLength;
+ int fastn, slown;
+
+ if ( slowIndex == 0 )
+ {
+ slowToFastMapping[0] = fastIndex;
+
+ while ( fastIndex >= 0 )
+ {
+ fastToSlowMapping[fastIndex--] = 0;
+ }
+
+ return;
+ }
+
+ if ( fastIndex == 0 )
+ {
+ while ( slowIndex >= 0 )
+ {
+ slowToFastMapping[slowIndex--] = 0;
+ }
+
+ fastToSlowMapping[0] = slowIndex;
+ return;
+ }
+
+ while ( slowIndex > 0 && fastIndex > 0 )
+ {
+ fastn = ( int ) fastSequence[fastIndex - 1]; //getCharInTightString(fastSequence,fastIndex-1);
+ slown = ( int ) slowSequence[slowIndex - 1]; //getCharInTightString(slowSequence,slowIndex-1);
+
+ if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex - 1][slowIndex - 1] + SIM[fastn][slown] )
+ {
+ fastToSlowMapping[--fastIndex] = --slowIndex;
+ slowToFastMapping[slowIndex] = fastIndex;
+ }
+ else if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex - 1][slowIndex] + INDEL )
+ {
+ fastToSlowMapping[--fastIndex] = slowIndex - 1;
+ }
+ else if ( Fmatrix[fastIndex][slowIndex] == Fmatrix[fastIndex][slowIndex - 1] + INDEL )
+ {
+ slowToFastMapping[--slowIndex] = fastIndex - 1;
+ }
+ else
+ {
+ fprintf ( stderr, "CompareSequence: Error trace.\n" );
+ abort ();
+ }
+ }
+
+ while ( slowIndex > 0 )
+ {
+ slowToFastMapping[--slowIndex] = -1;
+ }
+
+ while ( fastIndex > 0 )
+ {
+ fastToSlowMapping[--fastIndex] = -1;
+ }
+
+ slowToFastMapping[slowSeqLength] = fastSeqLength;
+ fastToSlowMapping[fastSeqLength] = slowSeqLength;
}
-static boolean chopReadFillGap ( int len_seq, int overlap, char * src_seq, char * bal_seq,
- KmerSet * kset, Kmer WORDF, int * start, int * end, boolean * bal,
- Kmer * KmerCtg1, int len1, Kmer * KmerCtg2, int len2, int * index1, int * index2 )
+static boolean chopReadFillGap ( int len_seq, int overlap, char *src_seq, char *bal_seq,
+ KmerSet *kset, Kmer WORDF, int *start, int *end, boolean *bal,
+ Kmer *KmerCtg1, int len1, Kmer *KmerCtg2, int len2, int *index1, int *index2 )
{
- int index, j = 0, bal_j;
- Kmer word, bal_word;
- int flag = 0, bal_flag = 0;
- int ctg1start, bal_ctg1start, ctg2end, bal_ctg2end;
- int seqStart, bal_start, seqEnd, bal_end;
- kmer_t * node;
- boolean found;
-
- if ( len_seq < overlap + 1 )
- {
- return 0;
- }
+ int index, j = 0, bal_j;
+ Kmer word, bal_word;
+ int flag = 0, bal_flag = 0;
+ int ctg1start, bal_ctg1start, ctg2end, bal_ctg2end;
+ int seqStart, bal_start, seqEnd, bal_end;
+ kmer_t *node;
+ boolean found;
+
+ if ( len_seq < overlap + 1 )
+ {
+ return 0;
+ }
#ifdef MER127
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
- for ( index = 0; index < overlap; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= src_seq[index];
- }
+ for ( index = 0; index < overlap; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= src_seq[index];
+ }
#else
- word.high = word.low = 0;
+ word.high = word.low = 0;
- for ( index = 0; index < overlap; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= src_seq[index];
- }
+ for ( index = 0; index < overlap; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= src_seq[index];
+ }
#endif
- reverseComplementSeq ( src_seq, len_seq, bal_seq );
- // complementary node
- bal_word = reverseComplement ( word, overlap );
- bal_j = len_seq - 0 - overlap; // 0;
- flag = bal_flag = 0;
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- found = search_kmerset ( kset, word, &node );
- }
- else
- {
- found = search_kmerset ( kset, bal_word, &node );
- }
-
- //if ( !found ) { printf ( "chopReadFillGap 1292 not found!\n" ); }
-
- if ( found && !node->linear && !node->checked )
- {
- if ( !flag && node->inEdge == 1 )
- {
- ctg1start = searchKmerOnCtg ( word, KmerCtg1, len1 );
-
- if ( ctg1start >= 0 )
- {
- flag = 1;
- seqStart = j + overlap - 1;
- }
- }
-
- if ( !bal_flag && node->inEdge == 2 )
- {
- bal_ctg2end = searchKmerOnCtg ( bal_word, KmerCtg2, len2 );
-
- if ( bal_ctg2end >= 0 )
- {
- bal_flag = 2;
- bal_end = bal_j + overlap - 1;
- }
- }
- }
-
- for ( j = 1; j <= len_seq - overlap; j++ )
- {
- word = nextKmerLocal ( word, src_seq[j - 1 + overlap], WORDF );
- bal_j = len_seq - j - overlap; // j;
- bal_word = prevKmerLocal ( bal_word, bal_seq[bal_j], overlap );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- found = search_kmerset ( kset, word, &node );
- }
- else
- {
- found = search_kmerset ( kset, bal_word, &node );
- }
-
- //if ( !found ) { printf ( "chopReadFillGap 1321 not found!\n" ); }
-
- if ( found && !node->linear && !node->checked )
- {
- if ( !flag && node->inEdge == 1 )
- {
- ctg1start = searchKmerOnCtg ( word, KmerCtg1, len1 );
-
- if ( ctg1start >= 0 )
- {
- flag = 1;
- seqStart = j + overlap - 1;
- }
- }
- else if ( flag == 1 && node->inEdge == 1 )
- {
- index = searchKmerOnCtg ( word, KmerCtg1, len1 );
-
- if ( index >= 0 && index > ctg1start ) // choose hit closer to gap
- {
- ctg1start = index;
- seqStart = j + overlap - 1;
- }
- }
- else if ( flag == 1 && node->inEdge == 2 )
- {
- ctg2end = searchKmerOnCtg ( word, KmerCtg2, len2 );
-
- if ( ctg2end >= 0 )
- {
- flag = 3;
- seqEnd = j + overlap - 1;
- break;
- }
- }
-
- if ( !bal_flag && node->inEdge == 2 )
- {
- bal_ctg2end = searchKmerOnCtg ( bal_word, KmerCtg2, len2 );
-
- if ( bal_ctg2end >= 0 )
- {
- bal_flag = 2;
- bal_end = bal_j + overlap - 1;
- }
- }
- else if ( bal_flag == 2 && node->inEdge == 2 )
- {
- index = searchKmerOnCtg ( bal_word, KmerCtg2, len2 );
-
- if ( index >= 0 && index < bal_ctg2end ) // choose hit closer to gap
- {
- bal_ctg2end = index;
- bal_end = bal_j + overlap - 1;
- }
- }
- else if ( bal_flag == 2 && node->inEdge == 1 )
- {
- bal_ctg1start = searchKmerOnCtg ( bal_word, KmerCtg1, len1 );
-
- if ( bal_ctg1start >= 0 )
- {
- bal_flag = 3;
- bal_start = bal_j + overlap - 1;
- break;
- }
- }
- }
- }
-
- if ( flag == 3 )
- {
- *start = seqStart;
- *end = seqEnd;
- *bal = 0;
- *index1 = ctg1start;
- *index2 = ctg2end;
- return 1;
- }
- else if ( bal_flag == 3 )
- {
- *start = bal_start;
- *end = bal_end;
- *bal = 1;
- *index1 = bal_ctg1start;
- *index2 = bal_ctg2end;
- return 1;
- }
-
- return 0;
+ reverseComplementSeq ( src_seq, len_seq, bal_seq );
+ // complementary node
+ bal_word = reverseComplement ( word, overlap );
+ bal_j = len_seq - 0 - overlap; // 0;
+ flag = bal_flag = 0;
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ found = search_kmerset ( kset, word, &node );
+ }
+ else
+ {
+ found = search_kmerset ( kset, bal_word, &node );
+ }
+
+ //if ( !found ) { printf ( "chopReadFillGap 1292 not found!\n" ); }
+
+ if ( found && !node->linear && !node->checked )
+ {
+ if ( !flag && node->inEdge == 1 )
+ {
+ ctg1start = searchKmerOnCtg ( word, KmerCtg1, len1 );
+
+ if ( ctg1start >= 0 )
+ {
+ flag = 1;
+ seqStart = j + overlap - 1;
+ }
+ }
+
+ if ( !bal_flag && node->inEdge == 2 )
+ {
+ bal_ctg2end = searchKmerOnCtg ( bal_word, KmerCtg2, len2 );
+
+ if ( bal_ctg2end >= 0 )
+ {
+ bal_flag = 2;
+ bal_end = bal_j + overlap - 1;
+ }
+ }
+ }
+
+ for ( j = 1; j <= len_seq - overlap; j++ )
+ {
+ word = nextKmerLocal ( word, src_seq[j - 1 + overlap], WORDF );
+ bal_j = len_seq - j - overlap; // j;
+ bal_word = prevKmerLocal ( bal_word, bal_seq[bal_j], overlap );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ found = search_kmerset ( kset, word, &node );
+ }
+ else
+ {
+ found = search_kmerset ( kset, bal_word, &node );
+ }
+
+ //if ( !found ) { printf ( "chopReadFillGap 1321 not found!\n" ); }
+
+ if ( found && !node->linear && !node->checked )
+ {
+ if ( !flag && node->inEdge == 1 )
+ {
+ ctg1start = searchKmerOnCtg ( word, KmerCtg1, len1 );
+
+ if ( ctg1start >= 0 )
+ {
+ flag = 1;
+ seqStart = j + overlap - 1;
+ }
+ }
+ else if ( flag == 1 && node->inEdge == 1 )
+ {
+ index = searchKmerOnCtg ( word, KmerCtg1, len1 );
+
+ if ( index >= 0 && index > ctg1start ) // choose hit closer to gap
+ {
+ ctg1start = index;
+ seqStart = j + overlap - 1;
+ }
+ }
+ else if ( flag == 1 && node->inEdge == 2 )
+ {
+ ctg2end = searchKmerOnCtg ( word, KmerCtg2, len2 );
+
+ if ( ctg2end >= 0 )
+ {
+ flag = 3;
+ seqEnd = j + overlap - 1;
+ break;
+ }
+ }
+
+ if ( !bal_flag && node->inEdge == 2 )
+ {
+ bal_ctg2end = searchKmerOnCtg ( bal_word, KmerCtg2, len2 );
+
+ if ( bal_ctg2end >= 0 )
+ {
+ bal_flag = 2;
+ bal_end = bal_j + overlap - 1;
+ }
+ }
+ else if ( bal_flag == 2 && node->inEdge == 2 )
+ {
+ index = searchKmerOnCtg ( bal_word, KmerCtg2, len2 );
+
+ if ( index >= 0 && index < bal_ctg2end ) // choose hit closer to gap
+ {
+ bal_ctg2end = index;
+ bal_end = bal_j + overlap - 1;
+ }
+ }
+ else if ( bal_flag == 2 && node->inEdge == 1 )
+ {
+ bal_ctg1start = searchKmerOnCtg ( bal_word, KmerCtg1, len1 );
+
+ if ( bal_ctg1start >= 0 )
+ {
+ bal_flag = 3;
+ bal_start = bal_j + overlap - 1;
+ break;
+ }
+ }
+ }
+ }
+
+ if ( flag == 3 )
+ {
+ *start = seqStart;
+ *end = seqEnd;
+ *bal = 0;
+ *index1 = ctg1start;
+ *index2 = ctg2end;
+ return 1;
+ }
+ else if ( bal_flag == 3 )
+ {
+ *start = bal_start;
+ *end = bal_end;
+ *bal = 1;
+ *index1 = bal_ctg1start;
+ *index2 = bal_ctg2end;
+ return 1;
+ }
+
+ return 0;
}
-static int cutSeqFromTightStr ( char * tightStr, int length, int start, int end, int revS, char * src_seq )
+static int cutSeqFromTightStr ( char *tightStr, int length, int start, int end, int revS, char *src_seq )
{
- int i, index = 0;
- end = end < length ? end : length - 1;
- start = start >= 0 ? start : 0;
-
- if ( !revS )
- {
- for ( i = start; i <= end; i++ )
- {
- src_seq[index++] = getCharInTightString ( tightStr, i );
- }
- }
- else
- {
- for ( i = length - 1 - start; i >= length - end - 1; i-- )
- {
- src_seq[index++] = int_comp ( getCharInTightString ( tightStr, i ) );
- }
- }
-
- return end - start + 1;
+ int i, index = 0;
+ end = end < length ? end : length - 1;
+ start = start >= 0 ? start : 0;
+
+ if ( !revS )
+ {
+ for ( i = start; i <= end; i++ )
+ {
+ src_seq[index++] = getCharInTightString ( tightStr, i );
+ }
+ }
+ else
+ {
+ for ( i = length - 1 - start; i >= length - end - 1; i-- )
+ {
+ src_seq[index++] = int_comp ( getCharInTightString ( tightStr, i ) );
+ }
+ }
+
+ return end - start + 1;
}
-static int cutSeqFromCtg ( unsigned int ctgID, int start, int end, char * sequence, int originOverlap )
+static int cutSeqFromCtg ( unsigned int ctgID, int start, int end, char *sequence, int originOverlap )
{
- unsigned int bal_ctg = getTwinCtg ( ctgID );
-
- if ( contig_array[ctgID].length < 1 )
- {
- return 0;
- }
-
- int length = contig_array[ctgID].length + originOverlap;
-
- if ( contig_array[ctgID].seq )
- {
- return cutSeqFromTightStr ( contig_array[ctgID].seq, length, start, end, 0, sequence );
- }
- else
- {
- return cutSeqFromTightStr ( contig_array[bal_ctg].seq, length, start, end, 1, sequence );
- }
+ unsigned int bal_ctg = getTwinCtg ( ctgID );
+
+ if ( contig_array[ctgID].length < 1 )
+ {
+ return 0;
+ }
+
+ int length = contig_array[ctgID].length + originOverlap;
+
+ if ( contig_array[ctgID].seq )
+ {
+ return cutSeqFromTightStr ( contig_array[ctgID].seq, length, start, end, 0, sequence );
+ }
+ else
+ {
+ return cutSeqFromTightStr ( contig_array[bal_ctg].seq, length, start, end, 1, sequence );
+ }
}
-static int cutSeqFromRead ( char * src_seq, int length, int start, int end, char * sequence )
+static int cutSeqFromRead ( char *src_seq, int length, int start, int end, char *sequence )
{
- if ( end >= length )
- {
- fprintf ( stderr, "The index is bigger than the length: end %d length %d.\n", end, length );
- }
+ if ( end >= length )
+ {
+ fprintf ( stderr, "The index is bigger than the length: end %d length %d.\n", end, length );
+ }
- end = end < length ? end : length - 1;
- start = start >= 0 ? start : 0;
- int i;
+ end = end < length ? end : length - 1;
+ start = start >= 0 ? start : 0;
+ int i;
- for ( i = start; i <= end; i++ )
- {
- sequence[i - start] = src_seq[i];
- }
+ for ( i = start; i <= end; i++ )
+ {
+ sequence[i - start] = src_seq[i];
+ }
- return end - start + 1;
+ return end - start + 1;
}
-void printSeq ( FILE * fo, char * seq, int len )
+void printSeq ( FILE *fo, char *seq, int len )
{
- int i;
+ int i;
- for ( i = 0; i < len; i++ )
- {
- fprintf ( fo, "%c", int2base ( ( int ) seq[i] ) );
- }
+ for ( i = 0; i < len; i++ )
+ {
+ fprintf ( fo, "%c", int2base ( ( int ) seq[i] ) );
+ }
- fprintf ( fo, "\n" );
+ fprintf ( fo, "\n" );
}
-static boolean readsCrossGap ( READNEARBY * rdArray, int num, int originOverlap, DARRAY * gapSeqArray,
- Kmer * kmerCtg1, Kmer * kmerCtg2, int overlap, CTGinSCAF * ctg1, CTGinSCAF * ctg2,
- KmerSet * kmerS, Kmer WordFilter, int min, int max, int offset1, int offset2,
- char * seqGap, char * seqCtg1, char * seqCtg2, int cut1, int cut2 )
+static boolean readsCrossGap ( READNEARBY *rdArray, int num, int originOverlap, DARRAY *gapSeqArray,
+ Kmer *kmerCtg1, Kmer *kmerCtg2, int overlap, CTGinSCAF *ctg1, CTGinSCAF *ctg2,
+ KmerSet *kmerS, Kmer WordFilter, int min, int max, int offset1, int offset2,
+ char *seqGap, char *seqCtg1, char *seqCtg2, int cut1, int cut2 )
{
- int i, j, start, end, startOnCtg1, endOnCtg2;
- char * bal_seq;
- char * src_seq;
- char * pt;
- boolean bal, ret = 0, FILL;
- double maxScore = 0.0;
- int maxIndex;
- int lenCtg1, lenCtg2;
- //build sequences on left and right of the uncertain region
- int buffer_size = maxReadLen > 100 ? maxReadLen : 100;
- int length = contig_array[ctg1->ctgID].length + originOverlap;
-
- if ( buffer_size > offset1 )
- {
- lenCtg1 = cutSeqFromCtg ( ctg1->ctgID, length - cut1 - ( buffer_size - offset1 ), length - 1 - cut1, seqCtg1, originOverlap );
-
- for ( i = 0; i < offset1; i++ )
- {
- seqCtg1[lenCtg1 + i] = seqGap[i];
- }
-
- lenCtg1 += offset1;
- }
- else
- {
- for ( i = offset1 - buffer_size; i < offset1; i++ )
- {
- seqCtg1[i + buffer_size - offset1] = seqGap[i];
- }
-
- lenCtg1 = buffer_size;
- }
-
- length = contig_array[ctg2->ctgID].length + originOverlap;
-
- if ( buffer_size > offset2 )
- {
- lenCtg2 = cutSeqFromCtg ( ctg2->ctgID, cut2, buffer_size - offset2 - 1 + cut2, & ( seqCtg2[offset2] ), originOverlap );
-
- for ( i = 0; i < offset2; i++ )
- {
- seqCtg2[i] = seqGap[i + offset1];
- }
-
- lenCtg2 += offset2;
- }
- else
- {
- for ( i = 0; i < buffer_size; i++ )
- {
- seqCtg2[i] = seqGap[i + offset1];
- }
-
- lenCtg2 = buffer_size;
- }
-
- /*
- if(offset1>0||offset2>0){
- for(i=0;i<lenCtg1;i++)
- fprintf(stderr,"%c",int2base(seqCtg1[i]));
- fprintf(stderr,": CTG1\n");
- for(i=0;i<lenCtg2;i++)
- fprintf(stderr,"%c",int2base(seqCtg2[i]));
- fprintf(stderr,": CTG2\n");
- }
- */
- //chop kmer from both ends of the uncertain region
- int len1, len2;
- len1 = CTGendLen < lenCtg1 ? CTGendLen : lenCtg1;
- len2 = CTGendLen < lenCtg2 ? CTGendLen : lenCtg2;
- chopKmer4Ctg ( kmerCtg1, len1, overlap, & ( seqCtg1[lenCtg1 - len1] ), WordFilter );
- chopKmer4Ctg ( kmerCtg2, len2, overlap, seqCtg2, WordFilter );
- len1 -= overlap - 1;
- len2 -= overlap - 1;
- src_seq = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- bal_seq = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- int * START = ( int * ) ckalloc ( num * sizeof ( int ) );
- int * END = ( int * ) ckalloc ( num * sizeof ( int ) );
- int * INDEX1 = ( int * ) ckalloc ( num * sizeof ( int ) );
- int * INDEX2 = ( int * ) ckalloc ( num * sizeof ( int ) );
- double * SCORE = ( double * ) ckalloc ( num * sizeof ( double ) );
- boolean * BAL = ( boolean * ) ckalloc ( num * sizeof ( boolean ) );
- memset ( SCORE, 0, num * sizeof ( double ) );
-
- for ( i = 0; i < num; i++ )
- {
- getSeqFromRead ( rdArray[i], src_seq );
- FILL = chopReadFillGap ( rdArray[i].len, overlap, src_seq, bal_seq, kmerS, WordFilter, &start, &end, &bal, kmerCtg1, len1, kmerCtg2, len2, &startOnCtg1, &endOnCtg2 );
-
- if ( !FILL || ( end - start ) < min || ( end - start ) > max )
- {
- continue;
- }
-
- if ( overlap + ( len1 - startOnCtg1 - 1 ) + endOnCtg2 - ( end - start ) > ( int ) originOverlap )
- {
- continue;
- } // contig1 and contig2 could not overlap more than origOverlap bases
-
- START[i] = start;
- END[i] = end;
- INDEX1[i] = startOnCtg1;
- INDEX2[i] = endOnCtg2;
- BAL[i] = bal;
- int matchLen = 2 * overlap < ( end - start + overlap ) ? 2 * overlap : ( end - start + overlap );
- int match;
- int alignLen = matchLen;
- //compare the left of hit kmer on ctg1
- //int ctgLeft = (contig_array[ctg1->ctgID].length+originOverlap)-(len1+overlap-1)+startOnCtg1;
- int ctgLeft = ( lenCtg1 ) - ( len1 + overlap - 1 ) + startOnCtg1;
- int readLeft = start - overlap + 1;
- int cmpLen = ctgLeft < readLeft ? ctgLeft : readLeft;
- cmpLen = cmpLen <= MAXREADLENGTH ? cmpLen : MAXREADLENGTH;
- //cutSeqFromCtg(ctg1->ctgID,ctgLeft-cmpLen,ctgLeft-1,fastSequence,originOverlap);
- cutSeqFromRead ( seqCtg1, lenCtg1, ctgLeft - cmpLen, ctgLeft - 1, fastSequence );
-
- if ( !bal )
- {
- cutSeqFromRead ( src_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence );
- }
- else
- {
- cutSeqFromRead ( bal_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence );
- }
-
- match = compareSequences ( fastSequence, slowSequence, cmpLen, cmpLen );
- alignLen += cmpLen;
- matchLen += match;
- //compare the right of hit kmer on ctg1
- int ctgRight = len1 - startOnCtg1 - 1;
- cmpLen = ctgRight < ( rdArray[i].len - start - 1 ) ? ctgRight : ( rdArray[i].len - start - 1 );
- cmpLen = cmpLen <= MAXREADLENGTH ? cmpLen : MAXREADLENGTH;
- //cutSeqFromCtg(ctg1->ctgID,ctgLeft+overlap,ctgLeft+overlap+cmpLen-1,fastSequence,originOverlap);
- cutSeqFromRead ( seqCtg1, lenCtg1, ctgLeft + overlap, ctgLeft + overlap + cmpLen - 1, fastSequence );
-
- if ( !bal )
- {
- cutSeqFromRead ( src_seq, rdArray[i].len, start + 1, start + cmpLen, slowSequence );
- }
- else
- {
- cutSeqFromRead ( bal_seq, rdArray[i].len, start + 1, start + cmpLen, slowSequence );
- }
-
- match = compareSequences ( fastSequence, slowSequence, cmpLen, cmpLen );
- //fprintf(stderr,"%d -- %d\n",match,cmpLen);
- alignLen += cmpLen;
- matchLen += match;
- //compare the left of hit kmer on ctg2
- ctgLeft = endOnCtg2;
- readLeft = end - overlap + 1;
- cmpLen = ctgLeft < readLeft ? ctgLeft : readLeft;
- cmpLen = ctgLeft <= MAXREADLENGTH ? ctgLeft : MAXREADLENGTH;
- //cutSeqFromCtg(ctg2->ctgID,endOnCtg2-cmpLen,endOnCtg2-1,fastSequence,originOverlap);
- cutSeqFromRead ( seqCtg2, lenCtg2, endOnCtg2 - cmpLen, endOnCtg2 - 1, fastSequence );
-
- if ( !bal )
- {
- cutSeqFromRead ( src_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence );
- }
- else
- {
- cutSeqFromRead ( bal_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence );
- }
-
- match = compareSequences ( fastSequence, slowSequence, cmpLen, cmpLen );
- alignLen += cmpLen;
- matchLen += match;
- //compare the right of hit kmer on ctg2
- //ctgRight = contig_array[ctg2->ctgID].length+originOverlap-endOnCtg2-overlap;
- ctgRight = lenCtg2 - endOnCtg2 - overlap;
- cmpLen = ctgRight < ( rdArray[i].len - end - 1 ) ? ctgRight : ( rdArray[i].len - end - 1 );
- cmpLen = cmpLen <= MAXREADLENGTH ? cmpLen : MAXREADLENGTH;
- //cutSeqFromCtg(ctg2->ctgID,endOnCtg2+overlap,endOnCtg2+overlap+cmpLen-1,fastSequence,originOverlap);
- cutSeqFromRead ( seqCtg2, lenCtg2, endOnCtg2 + overlap, endOnCtg2 + overlap + cmpLen - 1, fastSequence );
-
- if ( !bal )
- {
- cutSeqFromRead ( src_seq, rdArray[i].len, end + 1, end + cmpLen, slowSequence );
- }
- else
- {
- cutSeqFromRead ( bal_seq, rdArray[i].len, end + 1, end + cmpLen, slowSequence );
- }
-
- match = compareSequences ( fastSequence, slowSequence, cmpLen, cmpLen );
- alignLen += cmpLen;
- matchLen += match;
- /*
- if(cmpLen>0&&match!=cmpLen+overlap){
- printSeq(stderr,fastSequence,cmpLen+overlap);
- printSeq(stderr,slowSequence,cmpLen+overlap);
- printKmerSeqLocal(stderr,kmerCtg2[endOnCtg2],overlap);
- fprintf(stderr,": %d(%d)\n",bal,endOnCtg2);
- }else if(cmpLen>0&&match==cmpLen+overlap)
- fprintf(stderr,"Perfect\n");
- */
- double score = ( double ) matchLen / alignLen;
-
- if ( maxScore < score )
- {
- maxScore = score;
- //fprintf(stderr,"%4.2f (%d/%d)\n",maxScore,matchLen,alignLen);
- maxIndex = i;
- }
-
- SCORE[i] = score;
- }
-
- /*
- if(maxScore>0.0)
- fprintf(stderr,"SCORE: %4.2f\n",maxScore);
- */
- if ( maxScore > 0.9 )
- {
- /*
- for(i=0;i<lenCtg1;i++)
- fprintf(stderr,"%c",int2base(seqCtg1[i]));
- fprintf(stderr,": CTG1\n");
- for(i=0;i<lenCtg2;i++)
- fprintf(stderr,"%c",int2base(seqCtg2[i]));
- fprintf(stderr,": CTG2\n");
- fprintf(stderr,"%d+%d -- %d+%d, SCORE: %4.2f\n ",offset1,offset2,cut1,cut2,maxScore);
- */
- getSeqFromRead ( rdArray[maxIndex], src_seq );
- reverseComplementSeq ( src_seq, rdArray[maxIndex].len, bal_seq );
- int leftRemain = offset1 - ( len1 - INDEX1[maxIndex] - 1 ) > 0 ? offset1 - ( len1 - INDEX1[maxIndex] - 1 ) : 0;
- int rightRemain = offset2 - ( overlap + INDEX2[maxIndex] ) > 0 ? offset2 - ( overlap + INDEX2[maxIndex] ) : 0;
- ctg1->gapSeqOffset = gapSeqArray->item_c;
- ctg1->gapSeqLen = END[maxIndex] - START[maxIndex] + leftRemain + rightRemain;
-
- if ( darrayPut ( gapSeqArray, ctg1->gapSeqOffset + ( END[maxIndex] - START[maxIndex] + leftRemain + rightRemain ) / 4 ) )
- {
- pt = ( char * ) darrayPut ( gapSeqArray, ctg1->gapSeqOffset );
-
- for ( j = 0; j < leftRemain; j++ ) //get the left side of the gap region from search
- {
- writeChar2tightString ( seqGap[j], pt, j );
- //fprintf(stderr,"%c",int2base(seqGap[j]));
- }
-
- for ( j = START[maxIndex] + 1; j <= END[maxIndex]; j++ )
- {
- if ( BAL[maxIndex] )
- {
- writeChar2tightString ( bal_seq[j], pt, j - START[maxIndex] - 1 + leftRemain );
- //fprintf(stderr,"%c",int2base(bal_seq[j]));
- }
- else
- {
- writeChar2tightString ( src_seq[j], pt, j - START[maxIndex] - 1 + leftRemain );
- //fprintf(stderr,"%c",int2base(src_seq[j]));
- }
- }
-
- for ( j = offset2 - rightRemain; j < offset2; j++ ) //get the right side of the gap region from search
- {
- writeChar2tightString ( seqGap[j + leftRemain], pt, j + END[maxIndex] - START[maxIndex] + leftRemain );
- //fprintf(stderr,"%c",int2base(seqGap[j+leftRemain]));
- }
-
- /*
- fprintf(stderr,": GAPSEQ (%d+%d)(%d+%d)(%d+%d)(%d+%d) B %d\n",offset1,offset2,cut1,cut2,
- len1-INDEX1[maxIndex]-1,INDEX2[maxIndex],START[maxIndex],END[maxIndex],BAL[maxIndex]);
- */
- ctg1->cutTail = len1 - INDEX1[maxIndex] - 1 - offset1 + cut1 > cut1 ? len1 - INDEX1[maxIndex] - 1 - offset1 + cut1 : cut1;
- ctg2->cutHead = overlap + INDEX2[maxIndex] - offset2 + cut2 > cut2 ? overlap + INDEX2[maxIndex] - offset2 + cut2 : cut2;
- ctg2->scaftig_start = 0;
- ret = 1;
- }
- }
-
- free ( ( void * ) START );
- free ( ( void * ) END );
- free ( ( void * ) INDEX1 );
- free ( ( void * ) INDEX2 );
- free ( ( void * ) SCORE );
- free ( ( void * ) BAL );
- free ( ( void * ) src_seq );
- free ( ( void * ) bal_seq );
- return ret;
+ int i, j, start, end, startOnCtg1, endOnCtg2;
+ char *bal_seq;
+ char *src_seq;
+ char *pt;
+ boolean bal, ret = 0, FILL;
+ double maxScore = 0.0;
+ int maxIndex;
+ int lenCtg1, lenCtg2;
+ //build sequences on left and right of the uncertain region
+ int buffer_size = maxReadLen > 100 ? maxReadLen : 100;
+ int length = contig_array[ctg1->ctgID].length + originOverlap;
+
+ if ( buffer_size > offset1 )
+ {
+ lenCtg1 = cutSeqFromCtg ( ctg1->ctgID, length - cut1 - ( buffer_size - offset1 ), length - 1 - cut1, seqCtg1, originOverlap );
+
+ for ( i = 0; i < offset1; i++ )
+ {
+ seqCtg1[lenCtg1 + i] = seqGap[i];
+ }
+
+ lenCtg1 += offset1;
+ }
+ else
+ {
+ for ( i = offset1 - buffer_size; i < offset1; i++ )
+ {
+ seqCtg1[i + buffer_size - offset1] = seqGap[i];
+ }
+
+ lenCtg1 = buffer_size;
+ }
+
+ length = contig_array[ctg2->ctgID].length + originOverlap;
+
+ if ( buffer_size > offset2 )
+ {
+ lenCtg2 = cutSeqFromCtg ( ctg2->ctgID, cut2, buffer_size - offset2 - 1 + cut2, & ( seqCtg2[offset2] ), originOverlap );
+
+ for ( i = 0; i < offset2; i++ )
+ {
+ seqCtg2[i] = seqGap[i + offset1];
+ }
+
+ lenCtg2 += offset2;
+ }
+ else
+ {
+ for ( i = 0; i < buffer_size; i++ )
+ {
+ seqCtg2[i] = seqGap[i + offset1];
+ }
+
+ lenCtg2 = buffer_size;
+ }
+
+ /*
+ if(offset1>0||offset2>0){
+ for(i=0;i<lenCtg1;i++)
+ fprintf(stderr,"%c",int2base(seqCtg1[i]));
+ fprintf(stderr,": CTG1\n");
+ for(i=0;i<lenCtg2;i++)
+ fprintf(stderr,"%c",int2base(seqCtg2[i]));
+ fprintf(stderr,": CTG2\n");
+ }
+ */
+ //chop kmer from both ends of the uncertain region
+ int len1, len2;
+ len1 = CTGendLen < lenCtg1 ? CTGendLen : lenCtg1;
+ len2 = CTGendLen < lenCtg2 ? CTGendLen : lenCtg2;
+ chopKmer4Ctg ( kmerCtg1, len1, overlap, & ( seqCtg1[lenCtg1 - len1] ), WordFilter );
+ chopKmer4Ctg ( kmerCtg2, len2, overlap, seqCtg2, WordFilter );
+ len1 -= overlap - 1;
+ len2 -= overlap - 1;
+ src_seq = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ bal_seq = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ int *START = ( int * ) ckalloc ( num * sizeof ( int ) );
+ int *END = ( int * ) ckalloc ( num * sizeof ( int ) );
+ int *INDEX1 = ( int * ) ckalloc ( num * sizeof ( int ) );
+ int *INDEX2 = ( int * ) ckalloc ( num * sizeof ( int ) );
+ double *SCORE = ( double * ) ckalloc ( num * sizeof ( double ) );
+ boolean *BAL = ( boolean * ) ckalloc ( num * sizeof ( boolean ) );
+ memset ( SCORE, 0, num * sizeof ( double ) );
+
+ for ( i = 0; i < num; i++ )
+ {
+ getSeqFromRead ( rdArray[i], src_seq );
+ FILL = chopReadFillGap ( rdArray[i].len, overlap, src_seq, bal_seq, kmerS, WordFilter, &start, &end, &bal, kmerCtg1, len1, kmerCtg2, len2, &startOnCtg1, &endOnCtg2 );
+
+ if ( !FILL || ( end - start ) < min || ( end - start ) > max )
+ {
+ continue;
+ }
+
+ if ( overlap + ( len1 - startOnCtg1 - 1 ) + endOnCtg2 - ( end - start ) > ( int ) originOverlap )
+ {
+ continue;
+ } // contig1 and contig2 could not overlap more than origOverlap bases
+
+ START[i] = start;
+ END[i] = end;
+ INDEX1[i] = startOnCtg1;
+ INDEX2[i] = endOnCtg2;
+ BAL[i] = bal;
+ int matchLen = 2 * overlap < ( end - start + overlap ) ? 2 * overlap : ( end - start + overlap );
+ int match;
+ int alignLen = matchLen;
+ //compare the left of hit kmer on ctg1
+ //int ctgLeft = (contig_array[ctg1->ctgID].length+originOverlap)-(len1+overlap-1)+startOnCtg1;
+ int ctgLeft = ( lenCtg1 ) - ( len1 + overlap - 1 ) + startOnCtg1;
+ int readLeft = start - overlap + 1;
+ int cmpLen = ctgLeft < readLeft ? ctgLeft : readLeft;
+ cmpLen = cmpLen <= MAXREADLENGTH ? cmpLen : MAXREADLENGTH;
+ //cutSeqFromCtg(ctg1->ctgID,ctgLeft-cmpLen,ctgLeft-1,fastSequence,originOverlap);
+ cutSeqFromRead ( seqCtg1, lenCtg1, ctgLeft - cmpLen, ctgLeft - 1, fastSequence );
+
+ if ( !bal )
+ {
+ cutSeqFromRead ( src_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence );
+ }
+ else
+ {
+ cutSeqFromRead ( bal_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence );
+ }
+
+ match = compareSequences ( fastSequence, slowSequence, cmpLen, cmpLen );
+ alignLen += cmpLen;
+ matchLen += match;
+ //compare the right of hit kmer on ctg1
+ int ctgRight = len1 - startOnCtg1 - 1;
+ cmpLen = ctgRight < ( rdArray[i].len - start - 1 ) ? ctgRight : ( rdArray[i].len - start - 1 );
+ cmpLen = cmpLen <= MAXREADLENGTH ? cmpLen : MAXREADLENGTH;
+ //cutSeqFromCtg(ctg1->ctgID,ctgLeft+overlap,ctgLeft+overlap+cmpLen-1,fastSequence,originOverlap);
+ cutSeqFromRead ( seqCtg1, lenCtg1, ctgLeft + overlap, ctgLeft + overlap + cmpLen - 1, fastSequence );
+
+ if ( !bal )
+ {
+ cutSeqFromRead ( src_seq, rdArray[i].len, start + 1, start + cmpLen, slowSequence );
+ }
+ else
+ {
+ cutSeqFromRead ( bal_seq, rdArray[i].len, start + 1, start + cmpLen, slowSequence );
+ }
+
+ match = compareSequences ( fastSequence, slowSequence, cmpLen, cmpLen );
+ //fprintf(stderr,"%d -- %d\n",match,cmpLen);
+ alignLen += cmpLen;
+ matchLen += match;
+ //compare the left of hit kmer on ctg2
+ ctgLeft = endOnCtg2;
+ readLeft = end - overlap + 1;
+ cmpLen = ctgLeft < readLeft ? ctgLeft : readLeft;
+ cmpLen = ctgLeft <= MAXREADLENGTH ? ctgLeft : MAXREADLENGTH;
+ //cutSeqFromCtg(ctg2->ctgID,endOnCtg2-cmpLen,endOnCtg2-1,fastSequence,originOverlap);
+ cutSeqFromRead ( seqCtg2, lenCtg2, endOnCtg2 - cmpLen, endOnCtg2 - 1, fastSequence );
+
+ if ( !bal )
+ {
+ cutSeqFromRead ( src_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence );
+ }
+ else
+ {
+ cutSeqFromRead ( bal_seq, rdArray[i].len, readLeft - cmpLen, readLeft - 1, slowSequence );
+ }
+
+ match = compareSequences ( fastSequence, slowSequence, cmpLen, cmpLen );
+ alignLen += cmpLen;
+ matchLen += match;
+ //compare the right of hit kmer on ctg2
+ //ctgRight = contig_array[ctg2->ctgID].length+originOverlap-endOnCtg2-overlap;
+ ctgRight = lenCtg2 - endOnCtg2 - overlap;
+ cmpLen = ctgRight < ( rdArray[i].len - end - 1 ) ? ctgRight : ( rdArray[i].len - end - 1 );
+ cmpLen = cmpLen <= MAXREADLENGTH ? cmpLen : MAXREADLENGTH;
+ //cutSeqFromCtg(ctg2->ctgID,endOnCtg2+overlap,endOnCtg2+overlap+cmpLen-1,fastSequence,originOverlap);
+ cutSeqFromRead ( seqCtg2, lenCtg2, endOnCtg2 + overlap, endOnCtg2 + overlap + cmpLen - 1, fastSequence );
+
+ if ( !bal )
+ {
+ cutSeqFromRead ( src_seq, rdArray[i].len, end + 1, end + cmpLen, slowSequence );
+ }
+ else
+ {
+ cutSeqFromRead ( bal_seq, rdArray[i].len, end + 1, end + cmpLen, slowSequence );
+ }
+
+ match = compareSequences ( fastSequence, slowSequence, cmpLen, cmpLen );
+ alignLen += cmpLen;
+ matchLen += match;
+ /*
+ if(cmpLen>0&&match!=cmpLen+overlap){
+ printSeq(stderr,fastSequence,cmpLen+overlap);
+ printSeq(stderr,slowSequence,cmpLen+overlap);
+ printKmerSeqLocal(stderr,kmerCtg2[endOnCtg2],overlap);
+ fprintf(stderr,": %d(%d)\n",bal,endOnCtg2);
+ }else if(cmpLen>0&&match==cmpLen+overlap)
+ fprintf(stderr,"Perfect\n");
+ */
+ double score = ( double ) matchLen / alignLen;
+
+ if ( maxScore < score )
+ {
+ maxScore = score;
+ //fprintf(stderr,"%4.2f (%d/%d)\n",maxScore,matchLen,alignLen);
+ maxIndex = i;
+ }
+
+ SCORE[i] = score;
+ }
+
+ /*
+ if(maxScore>0.0)
+ fprintf(stderr,"SCORE: %4.2f\n",maxScore);
+ */
+ if ( maxScore > 0.9 )
+ {
+ /*
+ for(i=0;i<lenCtg1;i++)
+ fprintf(stderr,"%c",int2base(seqCtg1[i]));
+ fprintf(stderr,": CTG1\n");
+ for(i=0;i<lenCtg2;i++)
+ fprintf(stderr,"%c",int2base(seqCtg2[i]));
+ fprintf(stderr,": CTG2\n");
+ fprintf(stderr,"%d+%d -- %d+%d, SCORE: %4.2f\n ",offset1,offset2,cut1,cut2,maxScore);
+ */
+ getSeqFromRead ( rdArray[maxIndex], src_seq );
+ reverseComplementSeq ( src_seq, rdArray[maxIndex].len, bal_seq );
+ int leftRemain = offset1 - ( len1 - INDEX1[maxIndex] - 1 ) > 0 ? offset1 - ( len1 - INDEX1[maxIndex] - 1 ) : 0;
+ int rightRemain = offset2 - ( overlap + INDEX2[maxIndex] ) > 0 ? offset2 - ( overlap + INDEX2[maxIndex] ) : 0;
+ ctg1->gapSeqOffset = gapSeqArray->item_c;
+ ctg1->gapSeqLen = END[maxIndex] - START[maxIndex] + leftRemain + rightRemain;
+
+ if ( darrayPut ( gapSeqArray, ctg1->gapSeqOffset + ( END[maxIndex] - START[maxIndex] + leftRemain + rightRemain ) / 4 ) )
+ {
+ pt = ( char * ) darrayPut ( gapSeqArray, ctg1->gapSeqOffset );
+
+ for ( j = 0; j < leftRemain; j++ ) //get the left side of the gap region from search
+ {
+ writeChar2tightString ( seqGap[j], pt, j );
+ //fprintf(stderr,"%c",int2base(seqGap[j]));
+ }
+
+ for ( j = START[maxIndex] + 1; j <= END[maxIndex]; j++ )
+ {
+ if ( BAL[maxIndex] )
+ {
+ writeChar2tightString ( bal_seq[j], pt, j - START[maxIndex] - 1 + leftRemain );
+ //fprintf(stderr,"%c",int2base(bal_seq[j]));
+ }
+ else
+ {
+ writeChar2tightString ( src_seq[j], pt, j - START[maxIndex] - 1 + leftRemain );
+ //fprintf(stderr,"%c",int2base(src_seq[j]));
+ }
+ }
+
+ for ( j = offset2 - rightRemain; j < offset2; j++ ) //get the right side of the gap region from search
+ {
+ writeChar2tightString ( seqGap[j + leftRemain], pt, j + END[maxIndex] - START[maxIndex] + leftRemain );
+ //fprintf(stderr,"%c",int2base(seqGap[j+leftRemain]));
+ }
+
+ /*
+ fprintf(stderr,": GAPSEQ (%d+%d)(%d+%d)(%d+%d)(%d+%d) B %d\n",offset1,offset2,cut1,cut2,
+ len1-INDEX1[maxIndex]-1,INDEX2[maxIndex],START[maxIndex],END[maxIndex],BAL[maxIndex]);
+ */
+ ctg1->cutTail = len1 - INDEX1[maxIndex] - 1 - offset1 + cut1 > cut1 ? len1 - INDEX1[maxIndex] - 1 - offset1 + cut1 : cut1;
+ ctg2->cutHead = overlap + INDEX2[maxIndex] - offset2 + cut2 > cut2 ? overlap + INDEX2[maxIndex] - offset2 + cut2 : cut2;
+ ctg2->scaftig_start = 0;
+ ret = 1;
+ }
+ }
+
+ free ( ( void * ) START );
+ free ( ( void * ) END );
+ free ( ( void * ) INDEX1 );
+ free ( ( void * ) INDEX2 );
+ free ( ( void * ) SCORE );
+ free ( ( void * ) BAL );
+ free ( ( void * ) src_seq );
+ free ( ( void * ) bal_seq );
+ return ret;
}
diff --git a/standardPregraph/main.c b/standardPregraph/main.c
index 98170e4..e7c4c73 100644
--- a/standardPregraph/main.c
+++ b/standardPregraph/main.c
@@ -1,7 +1,7 @@
/*
* main.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -26,16 +26,16 @@
#include "extfunc.h"
#include "global.h"
-extern int call_pregraph ( int arc, char ** argv );
-extern int call_pregraph_sparse(int arc, char ** argv);
-extern int call_heavygraph ( int arc, char ** argv );
-extern int call_map2contig ( int arc, char ** argv );
-extern int call_scaffold ( int arc, char ** argv );
-extern int call_align ( int arc, char ** argv );
+extern int call_pregraph ( int arc, char **argv );
+extern int call_pregraph_sparse(int arc, char **argv);
+extern int call_heavygraph ( int arc, char **argv );
+extern int call_map2contig ( int arc, char **argv );
+extern int call_scaffold ( int arc, char **argv );
+extern int call_align ( int arc, char **argv );
static void display_usage ();
static void display_all_usage ();
-static void pipeline ( int argc, char ** argv );
+static void pipeline ( int argc, char **argv );
/*************************************************
Function:
@@ -56,508 +56,534 @@ Output:
Return:
None.
*************************************************/
-int main ( int argc, char ** argv )
+int main ( int argc, char **argv )
{
- crc32c_Init();
- fprintf ( stderr, "\nVersion 2.04: released on July 13th, 2012\nCompile %s\t%s\n", __DATE__, __TIME__ );
- argc--;
- argv++;
-
- if ( argc == 0 )
- {
- display_usage ();
- return 0;
- }
-
- if ( strcmp ( "pregraph", argv[0] ) == 0 )
- {
- call_pregraph ( argc, argv );
- }
- else if(strcmp ( "sparse_pregraph", argv[0] ) == 0 ){
- call_pregraph_sparse ( argc, argv );
-
- }
- else if ( strcmp ( "contig", argv[0] ) == 0 )
- {
- call_heavygraph ( argc, argv );
- }
- else if ( strcmp ( "map", argv[0] ) == 0 )
- {
- call_align ( argc, argv );
- }
- //call_map2contig(argc,argv);
- else if ( strcmp ( "scaff", argv[0] ) == 0 )
- {
- call_scaffold ( argc, argv );
- }
- else if ( strcmp ( "all", argv[0] ) == 0 )
- {
- pipeline ( argc, argv );
- }
- else
- {
- display_usage ();
- }
-
- return 0;
+ crc32c_Init();
+ fprintf ( stderr, "\nVersion 2.04: released on July 13th, 2012\nCompile %s\t%s\n", __DATE__, __TIME__ );
+ argc--;
+ argv++;
+
+ if ( argc == 0 )
+ {
+ display_usage ();
+ return 0;
+ }
+
+ if ( strcmp ( "pregraph", argv[0] ) == 0 )
+ {
+ call_pregraph ( argc, argv );
+ }
+ else if(strcmp ( "sparse_pregraph", argv[0] ) == 0 )
+ {
+ call_pregraph_sparse ( argc, argv );
+
+ }
+ else if ( strcmp ( "contig", argv[0] ) == 0 )
+ {
+ call_heavygraph ( argc, argv );
+ }
+ else if ( strcmp ( "map", argv[0] ) == 0 )
+ {
+ call_align ( argc, argv );
+ }
+ //call_map2contig(argc,argv);
+ else if ( strcmp ( "scaff", argv[0] ) == 0 )
+ {
+ call_scaffold ( argc, argv );
+ }
+ else if ( strcmp ( "all", argv[0] ) == 0 )
+ {
+ pipeline ( argc, argv );
+ }
+ else
+ {
+ display_usage ();
+ }
+
+ return 0;
}
static void display_usage ()
{
- fprintf ( stderr, "\nUsage: SOAPdenovo <command> [option]\n" );
- fprintf ( stderr, " pregraph construct kmer-graph\n" );
- fprintf ( stderr, " sparse_pregraph construct sparse kmer-graph\n");
- fprintf ( stderr, " contig eliminate errors and output contigs\n" );
- fprintf ( stderr, " map map reads to contigs\n" );
- fprintf ( stderr, " scaff construct scaffolds\n" );
- fprintf ( stderr, " all do pregraph-contig-map-scaff in turn\n" );
+ fprintf ( stderr, "\nUsage: SOAPdenovo <command> [option]\n" );
+ fprintf ( stderr, " pregraph construct kmer-graph\n" );
+ fprintf ( stderr, " sparse_pregraph construct sparse kmer-graph\n");
+ fprintf ( stderr, " contig eliminate errors and output contigs\n" );
+ fprintf ( stderr, " map map reads to contigs\n" );
+ fprintf ( stderr, " scaff construct scaffolds\n" );
+ fprintf ( stderr, " all do pregraph-contig-map-scaff in turn\n" );
}
-static void pipeline ( int argc, char ** argv )
+static void pipeline ( int argc, char **argv )
{
- char * options[32];
- unsigned char getK, getRfile, getOfile, getD, getDD, getL, getR, getP, getF, getf, getk, getu, getG, getc, getC, getb, getB, getN, getw, getV;
- unsigned char getm, getE; //getr,
- char readfile[256], outfile[256];
- char temp[128];
- char * name;
- int kmer = 0, cutoff_len = 0, ncpu = 0, lowK = 0, lowC = 0, kmer_small = 0, gap_diff = 0, genome_size = 0;
- float min_cvg = 0.0, max_cvg = 0.0, insert_size_bound = 0.0, bubble_coverage = 0.0;
- char kmer_s[16], len_s[16], ncpu_s[16], M_s[16], lowK_s[16], lowC_s[16], kmer_small_s[16], gap_diff_s[16], min_cvg_s[16], max_cvg_s[16], insert_size_bound_s[16], bubble_coverage_s[16], genome_size_s[16];
- int i, copt, index, M = 1;
- int maxk;
- char maxk_s[16];
- char arcfilter_s[16];
- extern char * optarg;
- time_t start_t, stop_t;
- time ( &start_t );
- getK = getRfile = getOfile = getD = getDD = getL = getR = getP = getF = getf = getk = getu = getG = getc = getC = getb = getB = getN = getw = getm = getE = getV = 0;
-
- while ( ( copt = getopt ( argc, argv, "a:s:o:K:M:L:p:G:d:D:RuFk:fc:C:b:B:N:wm:e:EV" ) ) != EOF ) //r
- {
- switch ( copt )
- {
- case 's':
- getRfile = 1;
- sscanf ( optarg, "%s", readfile );
- break;
- case 'o':
- getOfile = 1;
- sscanf ( optarg, "%s", outfile );
- break;
- case 'K':
- getK = 1;
- sscanf ( optarg, "%s", temp );
- kmer = atoi ( temp );
- break;
- case 'G':
- getG = 1;
- sscanf ( optarg, "%s", temp );
- gap_diff = atoi ( temp );
- break;
- case 'M':
- sscanf ( optarg, "%s", temp );
- M = atoi ( temp );
- break;
- case 'p':
- getP = 1;
- sscanf ( optarg, "%s", temp );
- ncpu = atoi ( temp );
- break;
- case 'L':
- getL = 1;
- sscanf ( optarg, "%s", temp );
- cutoff_len = atoi ( temp );
- break;
- case 'R':
- getR = 1;
- break;
- case 'u':
- getu = 1;
- maskRep = 0;
- break;
- case 'd':
- getD = 1;
- sscanf ( optarg, "%s", temp );
- lowK = atoi ( temp );
- break;
- case 'D':
- getDD = 1;
- sscanf ( optarg, "%s", temp );
- lowC = atoi ( temp );
- break;
- case 'a':
- initKmerSetSize = atoi ( optarg );
- break;
- case 'F':
- getF = 1;
- break;
- case 'k':
- getk = 1;
- sscanf ( optarg, "%s", temp );
- kmer_small = atoi ( temp );
- break;
- case 'f':
- getf = 1;
- break;
- case 'c':
- getc = 1;
- sscanf ( optarg, "%s", temp );
- min_cvg = atof ( temp );
- break;
- case 'C':
- getC = 1;
- sscanf ( optarg, "%s", temp );
- max_cvg = atof ( temp );
- break;
- case 'b':
- getb = 1;
- sscanf ( optarg, "%s", temp );
- insert_size_bound = atof ( temp );
- break;
- case 'B':
- getB = 1;
- sscanf ( optarg, "%s", temp );
- bubble_coverage = atof ( temp );
- break;
- case 'N':
- getN = 1;
- sscanf ( optarg, "%s", temp );
- genome_size = atoi ( temp );
- break;
- case 'w':
- getw = 1;
- break;
- case 'm':
- getm = 1;
- sscanf ( optarg, "%s", temp );
- maxk = atoi ( temp );
- break;
- /*
- case 'r':
- getr = 1;
- break;
- */
- case 'e':
- sscanf ( optarg, "%s", temp );
- arcfilter = atoi ( temp );
- break;
- case 'E':
- getE = 1;
- break;
- case 'V':
- getV = 1;
- break;
- default:
-
- if ( getRfile == 0 || getOfile == 0 )
- {
- display_all_usage ();
- exit ( -1 );
- }
- }
- }
-
- if ( getRfile == 0 || getOfile == 0 )
- {
- display_all_usage ();
- exit ( -1 );
- }
-
- if ( thrd_num < 1 )
- {
- thrd_num = 1;
- }
-
- // getK = getRfile = getOfile = getD = getL = getR = 0;
- name = "pregraph";
- index = 0;
- options[index++] = name;
- options[index++] = "-s";
- options[index++] = readfile;
-
- if ( getK )
- {
- options[index++] = "-K";
- sprintf ( kmer_s, "%d", kmer );
- options[index++] = kmer_s;
- }
-
- if ( getP )
- {
- options[index++] = "-p";
- sprintf ( ncpu_s, "%d", ncpu );
- options[index++] = ncpu_s;
- }
-
- if ( getD )
- {
- options[index++] = "-d";
- sprintf ( lowK_s, "%d", lowK );
- options[index++] = lowK_s;
- }
-
- if ( getR )
- {
- options[index++] = "-R";
- }
-
- options[index++] = "-o";
- options[index++] = outfile;
- /*
- for (i = 0; i < index; i++)
- {
- fprintf (stderr,"%s ", options[i]);
- }
-
- fprintf (stderr,"\n");
- */
- call_pregraph ( index, options );
- name = "contig";
- index = 0;
- options[index++] = name;
- options[index++] = "-g";
- options[index++] = outfile;
- options[index++] = "-M";
- sprintf ( M_s, "%d", M );
- options[index++] = M_s;
-
- if ( getR )
- {
- options[index++] = "-R";
- }
-
- if ( getDD )
- {
- options[index++] = "-D";
- sprintf ( lowC_s, "%d", lowC );
- options[index++] = lowC_s;
- }
-
- if ( getRfile )
- {
- options[index++] = "-s";
- options[index++] = readfile;
- }
-
- if ( getP )
- {
- options[index++] = "-p";
- sprintf ( ncpu_s, "%d", ncpu );
- options[index++] = ncpu_s;
- }
-
- if ( getm )
- {
- options[index++] = "-m";
- sprintf ( maxk_s, "%d", maxk );
- options[index++] = maxk_s;
- }
-
- /*
- if(getr){
- options[index++] = "-r";
- }
- */
- if ( getE )
- {
- options[index++] = "-E";
- }
-
- if ( arcfilter )
- {
- options[index++] = "-e";
- sprintf ( arcfilter_s, "%d", arcfilter );
- options[index++] = arcfilter_s;
- }
-
- /*
- for (i = 0; i < index; i++)
- {
- fprintf (stderr,"%s ", options[i]);
- }
-
- fprintf (stderr,"\n");
- */
- call_heavygraph ( index, options );
- name = "map";
- index = 0;
- options[index++] = name;
- options[index++] = "-s";
- options[index++] = readfile;
- options[index++] = "-g";
- options[index++] = outfile;
-
- if ( getP )
- {
- options[index++] = "-p";
- sprintf ( ncpu_s, "%d", ncpu );
- options[index++] = ncpu_s;
- }
-
- if ( getK )
- {
- options[index++] = "-K";
- sprintf ( kmer_s, "%d", kmer );
- options[index++] = kmer_s;
- }
-
- if ( getk )
- {
- options[index++] = "-k";
- sprintf ( kmer_small_s, "%d", kmer_small );
- options[index++] = kmer_small_s;
- }
-
- if ( getf )
- {
- options[index++] = "-f";
- }
-
- /*
- for (i = 0; i < index; i++)
- {
- fprintf (stderr,"%s ", options[i]);
- }
-
- fprintf (stderr,"\n");
- */
- call_align ( index, options );
- name = "scaff";
- index = 0;
- options[index++] = name;
- options[index++] = "-g";
- options[index++] = outfile;
-
- if ( getF )
- {
- options[index++] = "-F";
- }
-
- if ( getP )
- {
- options[index++] = "-p";
- sprintf ( ncpu_s, "%d", ncpu );
- options[index++] = ncpu_s;
- }
-
- if ( getL )
- {
- options[index++] = "-L";
- sprintf ( len_s, "%d", cutoff_len );
- options[index++] = len_s;
- }
-
- if ( getG )
- {
- options[index++] = "-G";
- sprintf ( gap_diff_s, "%d", gap_diff );
- options[index++] = gap_diff_s;
- }
-
- if ( getu )
- {
- options[index++] = "-u";
- }
-
- if ( getc )
- {
- options[index++] = "-c";
- sprintf ( min_cvg_s, "%f", min_cvg );
- options[index++] = min_cvg_s;
- }
-
- if ( getC )
- {
- options[index++] = "-C";
- sprintf ( max_cvg_s, "%f", max_cvg );
- options[index++] = max_cvg_s;
- }
-
- if ( getb )
- {
- options[index++] = "-b";
- sprintf ( insert_size_bound_s, "%f", insert_size_bound );
- options[index++] = insert_size_bound_s;
- }
-
- if ( getB )
- {
- options[index++] = "-B";
- sprintf ( bubble_coverage_s, "%f", bubble_coverage );
- options[index++] = bubble_coverage_s;
- }
-
- if ( getN )
- {
- options[index++] = "-N";
- sprintf ( genome_size_s, "%d", genome_size );
- options[index++] = genome_size_s;
- }
-
- if ( getw )
- {
- options[index++] = "-w";
- }
-
- if ( getV )
- {
- options[index++] = "-V";
- }
-
- /*
- for (i = 0; i < index; i++)
- {
- fprintf (stderr,"%s ", options[i]);
- }
-
- fprintf (stderr,"\n");
- */
- call_scaffold ( index, options );
- time ( &stop_t );
- fprintf ( stderr, "Time for the whole pipeline: %dm.\n", ( int ) ( stop_t - start_t ) / 60 );
+ char *options[32];
+ unsigned char getK, getRfile, getOfile, getD, getDD, getL, getR, getP, getF, getf, getk, getu, getG, getc, getC, getb, getB, getN, getw, getV;
+ unsigned char getm, getE; //getr,
+ char readfile[256], outfile[256];
+ char temp[128];
+ char *name;
+ int kmer = 0, cutoff_len = 0, ncpu = 0, lowK = 0, lowC = 0, kmer_small = 0, gap_diff = 0, genome_size = 0;
+ float min_cvg = 0.0, max_cvg = 0.0, insert_size_bound = 0.0, bubble_coverage = 0.0;
+ char kmer_s[16], len_s[16], ncpu_s[16], M_s[16], lowK_s[16], lowC_s[16], kmer_small_s[16], gap_diff_s[16], min_cvg_s[16], max_cvg_s[16], insert_size_bound_s[16], bubble_coverage_s[16], genome_size_s[16];
+ int i, copt, index, M = 1;
+ int maxk;
+ char maxk_s[16];
+ char arcfilter_s[16];
+ extern char *optarg;
+ time_t start_t, stop_t;
+ time ( &start_t );
+ getK = getRfile = getOfile = getD = getDD = getL = getR = getP = getF = getf = getk = getu = getG = getc = getC = getb = getB = getN = getw = getm = getE = getV = 0;
+
+ while ( ( copt = getopt ( argc, argv, "a:s:o:K:M:L:p:G:d:D:RuFk:fc:C:b:B:N:wm:e:EV" ) ) != EOF ) //r
+ {
+ switch ( copt )
+ {
+ case 's':
+ getRfile = 1;
+ sscanf ( optarg, "%s", readfile );
+ break;
+
+ case 'o':
+ getOfile = 1;
+ sscanf ( optarg, "%s", outfile );
+ break;
+
+ case 'K':
+ getK = 1;
+ sscanf ( optarg, "%s", temp );
+ kmer = atoi ( temp );
+ break;
+
+ case 'G':
+ getG = 1;
+ sscanf ( optarg, "%s", temp );
+ gap_diff = atoi ( temp );
+ break;
+
+ case 'M':
+ sscanf ( optarg, "%s", temp );
+ M = atoi ( temp );
+ break;
+
+ case 'p':
+ getP = 1;
+ sscanf ( optarg, "%s", temp );
+ ncpu = atoi ( temp );
+ break;
+
+ case 'L':
+ getL = 1;
+ sscanf ( optarg, "%s", temp );
+ cutoff_len = atoi ( temp );
+ break;
+
+ case 'R':
+ getR = 1;
+ break;
+
+ case 'u':
+ getu = 1;
+ maskRep = 0;
+ break;
+
+ case 'd':
+ getD = 1;
+ sscanf ( optarg, "%s", temp );
+ lowK = atoi ( temp );
+ break;
+
+ case 'D':
+ getDD = 1;
+ sscanf ( optarg, "%s", temp );
+ lowC = atoi ( temp );
+ break;
+
+ case 'a':
+ initKmerSetSize = atoi ( optarg );
+ break;
+
+ case 'F':
+ getF = 1;
+ break;
+
+ case 'k':
+ getk = 1;
+ sscanf ( optarg, "%s", temp );
+ kmer_small = atoi ( temp );
+ break;
+
+ case 'f':
+ getf = 1;
+ break;
+
+ case 'c':
+ getc = 1;
+ sscanf ( optarg, "%s", temp );
+ min_cvg = atof ( temp );
+ break;
+
+ case 'C':
+ getC = 1;
+ sscanf ( optarg, "%s", temp );
+ max_cvg = atof ( temp );
+ break;
+
+ case 'b':
+ getb = 1;
+ sscanf ( optarg, "%s", temp );
+ insert_size_bound = atof ( temp );
+ break;
+
+ case 'B':
+ getB = 1;
+ sscanf ( optarg, "%s", temp );
+ bubble_coverage = atof ( temp );
+ break;
+
+ case 'N':
+ getN = 1;
+ sscanf ( optarg, "%s", temp );
+ genome_size = atoi ( temp );
+ break;
+
+ case 'w':
+ getw = 1;
+ break;
+
+ case 'm':
+ getm = 1;
+ sscanf ( optarg, "%s", temp );
+ maxk = atoi ( temp );
+ break;
+
+ /*
+ case 'r':
+ getr = 1;
+ break;
+ */
+ case 'e':
+ sscanf ( optarg, "%s", temp );
+ arcfilter = atoi ( temp );
+ break;
+
+ case 'E':
+ getE = 1;
+ break;
+
+ case 'V':
+ getV = 1;
+ break;
+
+ default:
+
+ if ( getRfile == 0 || getOfile == 0 )
+ {
+ display_all_usage ();
+ exit ( -1 );
+ }
+ }
+ }
+
+ if ( getRfile == 0 || getOfile == 0 )
+ {
+ display_all_usage ();
+ exit ( -1 );
+ }
+
+ if ( thrd_num < 1 )
+ {
+ thrd_num = 1;
+ }
+
+ // getK = getRfile = getOfile = getD = getL = getR = 0;
+ name = "pregraph";
+ index = 0;
+ options[index++] = name;
+ options[index++] = "-s";
+ options[index++] = readfile;
+
+ if ( getK )
+ {
+ options[index++] = "-K";
+ sprintf ( kmer_s, "%d", kmer );
+ options[index++] = kmer_s;
+ }
+
+ if ( getP )
+ {
+ options[index++] = "-p";
+ sprintf ( ncpu_s, "%d", ncpu );
+ options[index++] = ncpu_s;
+ }
+
+ if ( getD )
+ {
+ options[index++] = "-d";
+ sprintf ( lowK_s, "%d", lowK );
+ options[index++] = lowK_s;
+ }
+
+ if ( getR )
+ {
+ options[index++] = "-R";
+ }
+
+ options[index++] = "-o";
+ options[index++] = outfile;
+ /*
+ for (i = 0; i < index; i++)
+ {
+ fprintf (stderr,"%s ", options[i]);
+ }
+
+ fprintf (stderr,"\n");
+ */
+ call_pregraph ( index, options );
+ name = "contig";
+ index = 0;
+ options[index++] = name;
+ options[index++] = "-g";
+ options[index++] = outfile;
+ options[index++] = "-M";
+ sprintf ( M_s, "%d", M );
+ options[index++] = M_s;
+
+ if ( getR )
+ {
+ options[index++] = "-R";
+ }
+
+ if ( getDD )
+ {
+ options[index++] = "-D";
+ sprintf ( lowC_s, "%d", lowC );
+ options[index++] = lowC_s;
+ }
+
+ if ( getRfile )
+ {
+ options[index++] = "-s";
+ options[index++] = readfile;
+ }
+
+ if ( getP )
+ {
+ options[index++] = "-p";
+ sprintf ( ncpu_s, "%d", ncpu );
+ options[index++] = ncpu_s;
+ }
+
+ if ( getm )
+ {
+ options[index++] = "-m";
+ sprintf ( maxk_s, "%d", maxk );
+ options[index++] = maxk_s;
+ }
+
+ /*
+ if(getr){
+ options[index++] = "-r";
+ }
+ */
+ if ( getE )
+ {
+ options[index++] = "-E";
+ }
+
+ if ( arcfilter )
+ {
+ options[index++] = "-e";
+ sprintf ( arcfilter_s, "%d", arcfilter );
+ options[index++] = arcfilter_s;
+ }
+
+ /*
+ for (i = 0; i < index; i++)
+ {
+ fprintf (stderr,"%s ", options[i]);
+ }
+
+ fprintf (stderr,"\n");
+ */
+ call_heavygraph ( index, options );
+ name = "map";
+ index = 0;
+ options[index++] = name;
+ options[index++] = "-s";
+ options[index++] = readfile;
+ options[index++] = "-g";
+ options[index++] = outfile;
+
+ if ( getP )
+ {
+ options[index++] = "-p";
+ sprintf ( ncpu_s, "%d", ncpu );
+ options[index++] = ncpu_s;
+ }
+
+ if ( getK )
+ {
+ options[index++] = "-K";
+ sprintf ( kmer_s, "%d", kmer );
+ options[index++] = kmer_s;
+ }
+
+ if ( getk )
+ {
+ options[index++] = "-k";
+ sprintf ( kmer_small_s, "%d", kmer_small );
+ options[index++] = kmer_small_s;
+ }
+
+ if ( getf )
+ {
+ options[index++] = "-f";
+ }
+
+ /*
+ for (i = 0; i < index; i++)
+ {
+ fprintf (stderr,"%s ", options[i]);
+ }
+
+ fprintf (stderr,"\n");
+ */
+ call_align ( index, options );
+ name = "scaff";
+ index = 0;
+ options[index++] = name;
+ options[index++] = "-g";
+ options[index++] = outfile;
+
+ if ( getF )
+ {
+ options[index++] = "-F";
+ }
+
+ if ( getP )
+ {
+ options[index++] = "-p";
+ sprintf ( ncpu_s, "%d", ncpu );
+ options[index++] = ncpu_s;
+ }
+
+ if ( getL )
+ {
+ options[index++] = "-L";
+ sprintf ( len_s, "%d", cutoff_len );
+ options[index++] = len_s;
+ }
+
+ if ( getG )
+ {
+ options[index++] = "-G";
+ sprintf ( gap_diff_s, "%d", gap_diff );
+ options[index++] = gap_diff_s;
+ }
+
+ if ( getu )
+ {
+ options[index++] = "-u";
+ }
+
+ if ( getc )
+ {
+ options[index++] = "-c";
+ sprintf ( min_cvg_s, "%f", min_cvg );
+ options[index++] = min_cvg_s;
+ }
+
+ if ( getC )
+ {
+ options[index++] = "-C";
+ sprintf ( max_cvg_s, "%f", max_cvg );
+ options[index++] = max_cvg_s;
+ }
+
+ if ( getb )
+ {
+ options[index++] = "-b";
+ sprintf ( insert_size_bound_s, "%f", insert_size_bound );
+ options[index++] = insert_size_bound_s;
+ }
+
+ if ( getB )
+ {
+ options[index++] = "-B";
+ sprintf ( bubble_coverage_s, "%f", bubble_coverage );
+ options[index++] = bubble_coverage_s;
+ }
+
+ if ( getN )
+ {
+ options[index++] = "-N";
+ sprintf ( genome_size_s, "%d", genome_size );
+ options[index++] = genome_size_s;
+ }
+
+ if ( getw )
+ {
+ options[index++] = "-w";
+ }
+
+ if ( getV )
+ {
+ options[index++] = "-V";
+ }
+
+ /*
+ for (i = 0; i < index; i++)
+ {
+ fprintf (stderr,"%s ", options[i]);
+ }
+
+ fprintf (stderr,"\n");
+ */
+ call_scaffold ( index, options );
+ time ( &stop_t );
+ fprintf ( stderr, "Time for the whole pipeline: %dm.\n", ( int ) ( stop_t - start_t ) / 60 );
}
static void display_all_usage ()
{
- // fprintf (stderr,"\nSOAPdenovo all -s configFile -o outputGraph [-R -f -F -u -w] [-K kmer -p n_cpu -a initMemoryAssumption -d KmerFreqCutOff -D EdgeCovCutoff -M mergeLevel -k kmer_R2C, -G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -B bubbleCoverage -N genomeSize]\n");
- fprintf ( stderr, "\nSOAPdenovo all -s configFile -o outputGraph [-R -F -u -w] [-K kmer -p n_cpu -a initMemoryAssumption -d KmerFreqCutOff -D EdgeCovCutoff -M mergeLevel -k kmer_R2C, -G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -B bubbleCoverage -N genomeSize]\n" );
- fprintf ( stderr, " -s <string> configFile: the config file of solexa reads\n" );
- fprintf ( stderr, " -o <string> outputGraph: prefix of output graph file name\n" );
+ // fprintf (stderr,"\nSOAPdenovo all -s configFile -o outputGraph [-R -f -F -u -w] [-K kmer -p n_cpu -a initMemoryAssumption -d KmerFreqCutOff -D EdgeCovCutoff -M mergeLevel -k kmer_R2C, -G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -B bubbleCoverage -N genomeSize]\n");
+ fprintf ( stderr, "\nSOAPdenovo all -s configFile -o outputGraph [-R -F -u -w] [-K kmer -p n_cpu -a initMemoryAssumption -d KmerFreqCutOff -D EdgeCovCutoff -M mergeLevel -k kmer_R2C, -G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -B bubbleCoverage -N genomeSize]\n" );
+ fprintf ( stderr, " -s <string> configFile: the config file of solexa reads\n" );
+ fprintf ( stderr, " -o <string> outputGraph: prefix of output graph file name\n" );
#ifdef MER127
- fprintf ( stderr, " -K <int> kmer(min 13, max 127): kmer size, [23]\n" );
+ fprintf ( stderr, " -K <int> kmer(min 13, max 127): kmer size, [23]\n" );
#else
- fprintf ( stderr, " -K <int> kmer(min 13, max 63): kmer size, [23]\n" );
+ fprintf ( stderr, " -K <int> kmer(min 13, max 63): kmer size, [23]\n" );
#endif
- fprintf ( stderr, " -p <int> n_cpu: number of cpu for use, [8]\n" );
- fprintf ( stderr, " -a <int> initMemoryAssumption: memory assumption initialized to avoid further reallocation, unit G, [0]\n" );
- fprintf ( stderr, " -d <int> kmerFreqCutoff: kmers with frequency no larger than KmerFreqCutoff will be deleted, [0]\n" );
- fprintf ( stderr, " -R (optional) resolve repeats by reads, [NO]\n" );
- fprintf ( stderr, " -D <int> edgeCovCutoff: edges with coverage no larger than EdgeCovCutoff will be deleted, [1]\n" );
- fprintf ( stderr, " -M <int> mergeLevel(min 0, max 3): the strength of merging similar sequences during contiging, [1]\n" );
- fprintf ( stderr, " -e <int> arcWeight: two edges, between which the arc's weight is larger than arcWeight, will be linerized, [0]\n" );
+ fprintf ( stderr, " -p <int> n_cpu: number of cpu for use, [8]\n" );
+ fprintf ( stderr, " -a <int> initMemoryAssumption: memory assumption initialized to avoid further reallocation, unit G, [0]\n" );
+ fprintf ( stderr, " -d <int> kmerFreqCutoff: kmers with frequency no larger than KmerFreqCutoff will be deleted, [0]\n" );
+ fprintf ( stderr, " -R (optional) resolve repeats by reads, [NO]\n" );
+ fprintf ( stderr, " -D <int> edgeCovCutoff: edges with coverage no larger than EdgeCovCutoff will be deleted, [1]\n" );
+ fprintf ( stderr, " -M <int> mergeLevel(min 0, max 3): the strength of merging similar sequences during contiging, [1]\n" );
+ fprintf ( stderr, " -e <int> arcWeight: two edges, between which the arc's weight is larger than arcWeight, will be linerized, [0]\n" );
#ifdef MER127
- fprintf ( stderr, " -m <int> maxKmer (max 127): maximum kmer size used for multi-kmer, [NO]\n" );
+ fprintf ( stderr, " -m <int> maxKmer (max 127): maximum kmer size used for multi-kmer, [NO]\n" );
#else
- fprintf ( stderr, " -m <int> maxKmer (max 63): maximum kmer size used for multi-kmer, [NO]\n" );
+ fprintf ( stderr, " -m <int> maxKmer (max 63): maximum kmer size used for multi-kmer, [NO]\n" );
#endif
- fprintf ( stderr, " -E (optional) merge clean bubble before iterate, works only if -M is set when using multi-kmer, [NO]\n" );
- // printf (" -O (optional)\toutput contig of each kmer when iterating\n");
- // fprintf (stderr," -f (optional) output gap related reads in map step for using SRkgf to fill gaps, [NO]\n");
+ fprintf ( stderr, " -E (optional) merge clean bubble before iterate, works only if -M is set when using multi-kmer, [NO]\n" );
+ // printf (" -O (optional)\toutput contig of each kmer when iterating\n");
+ // fprintf (stderr," -f (optional) output gap related reads in map step for using SRkgf to fill gaps, [NO]\n");
#ifdef MER127
- fprintf ( stderr, " -k <int> kmer_R2C(min 13, max 127): kmer size used for mapping reads to contigs, [K]\n" );
+ fprintf ( stderr, " -k <int> kmer_R2C(min 13, max 127): kmer size used for mapping reads to contigs, [K]\n" );
#else
- fprintf ( stderr, " -k <int> kmer_R2C(min 13, max 63): kmer size used for mapping reads to contigs, [K]\n" );
+ fprintf ( stderr, " -k <int> kmer_R2C(min 13, max 63): kmer size used for mapping reads to contigs, [K]\n" );
#endif
- fprintf ( stderr, " -F (optional) fill gaps in scaffolds, [NO]\n" );
- fprintf ( stderr, " -u (optional) un-mask contigs with high/low coverage before scaffolding, [mask]\n" );
- fprintf ( stderr, " -w (optional) keep contigs weakly connected to other contigs in scaffold, [NO]\n" );
- fprintf ( stderr, " -G <int> gapLenDiff: allowed length difference between estimated and filled gap, [50]\n" );
- fprintf ( stderr, " -L <int> minContigLen: shortest contig for scaffolding, [K+2]\n" );
- fprintf ( stderr, " -c <float> minContigCvg: minimum contig coverage (c*avgCvg), contigs shorter than 100bp with coverage smaller than c*avgCvg will be masked before scaffolding unless -u is set, [0.1]\n" );
- fprintf ( stderr, " -C <float> maxContigCvg: maximum contig coverage (C*avgCvg), contigs with coverage larger than C*avgCvg or contigs shorter than 100bp with coverage larger than 0.8*C*avgCvg will be masked before scaffolding unless -u is set, [2]\n" );
- fprintf ( stderr, " -b <float> insertSizeUpperBound: (b*avg_ins) will be used as upper bound of insert size for large insert size ( > 1000) when handling pair-end connections between contigs if b is set to larger than 1, [1.5]\n" );
- fprintf ( stderr, " -B <float> bubbleCoverage: remove contig with lower cvoerage in bubble structure if both contigs' coverage are smaller than bubbleCoverage*avgCvg, [0.6]\n" );
- fprintf ( stderr, " -N <int> genomeSize: genome size for statistics, [0]\n" );
- fprintf ( stderr, " -V (optional) output information for Hawkeye to visualize the assembly, [NO]\n" );
+ fprintf ( stderr, " -F (optional) fill gaps in scaffolds, [NO]\n" );
+ fprintf ( stderr, " -u (optional) un-mask contigs with high/low coverage before scaffolding, [mask]\n" );
+ fprintf ( stderr, " -w (optional) keep contigs weakly connected to other contigs in scaffold, [NO]\n" );
+ fprintf ( stderr, " -G <int> gapLenDiff: allowed length difference between estimated and filled gap, [50]\n" );
+ fprintf ( stderr, " -L <int> minContigLen: shortest contig for scaffolding, [K+2]\n" );
+ fprintf ( stderr, " -c <float> minContigCvg: minimum contig coverage (c*avgCvg), contigs shorter than 100bp with coverage smaller than c*avgCvg will be masked before scaffolding unless -u is set, [0.1]\n" );
+ fprintf ( stderr, " -C <float> maxContigCvg: maximum contig coverage (C*avgCvg), contigs with coverage larger than C*avgCvg or contigs shorter than 100bp with coverage larger than 0.8*C*avgCvg will be masked before scaffolding unless -u is set, [2]\n" );
+ fprintf ( stderr, " -b <float> insertSizeUpperBound: (b*avg_ins) will be used as upper bound of insert size for large insert size ( > 1000) when handling pair-end connections between contigs if b is set to larger than 1, [1.5]\n" );
+ fprintf ( stderr, " -B <float> bubbleCoverage: remove contig with lower cvoerage in bubble structure if both contigs' coverage are smaller than bubbleCoverage*avgCvg, [0.6]\n" );
+ fprintf ( stderr, " -N <int> genomeSize: genome size for statistics, [0]\n" );
+ fprintf ( stderr, " -V (optional) output information for Hawkeye to visualize the assembly, [NO]\n" );
}
diff --git a/standardPregraph/map.c b/standardPregraph/map.c
index c762ec3..6853ce9 100644
--- a/standardPregraph/map.c
+++ b/standardPregraph/map.c
@@ -1,7 +1,7 @@
/*
* map.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -26,7 +26,7 @@
#include "extfunc.h"
#include "extvab.h"
-static void initenv ( int argc, char ** argv );
+static void initenv ( int argc, char **argv );
static char shortrdsfile[256];
static char graphfile[256];
@@ -45,34 +45,34 @@ Output:
Return:
The kmer size.
*************************************************/
-static int getMinOverlap ( char * gfile )
+static int getMinOverlap ( char *gfile )
{
- char name[256], ch;
- FILE * fp;
- int num_kmer, overlaplen = 23;
- char line[1024];
- sprintf ( name, "%s.preGraphBasic", gfile );
- fp = fopen ( name, "r" );
-
- if ( !fp )
- {
- return overlaplen;
- }
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == 'V' )
- {
- sscanf ( line + 6, "%d %c %d", &num_kmer, &ch, &overlaplen );
- }
- else if ( line[0] == 'M' )
- {
- sscanf ( line, "MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxReadLen, &minReadLen, &maxNameLen );
- }
- }
-
- fclose ( fp );
- return overlaplen;
+ char name[256], ch;
+ FILE *fp;
+ int num_kmer, overlaplen = 23;
+ char line[1024];
+ sprintf ( name, "%s.preGraphBasic", gfile );
+ fp = fopen ( name, "r" );
+
+ if ( !fp )
+ {
+ return overlaplen;
+ }
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == 'V' )
+ {
+ sscanf ( line + 6, "%d %c %d", &num_kmer, &ch, &overlaplen );
+ }
+ else if ( line[0] == 'M' )
+ {
+ sscanf ( line, "MaxReadLen %d MinReadLen %d MaxNameLen %d", &maxReadLen, &minReadLen, &maxNameLen );
+ }
+ }
+
+ fclose ( fp );
+ return overlaplen;
}
/*************************************************
@@ -93,53 +93,53 @@ Return:
0 if exits normally.
*************************************************/
-int call_align ( int argc, char ** argv )
+int call_align ( int argc, char **argv )
{
- time_t start_t, stop_t, time_bef, time_aft;
- time ( &start_t );
- fprintf ( stderr, "\n********************\n" );
- fprintf ( stderr, "Map\n" );
- fprintf ( stderr, "********************\n\n" );
- initenv ( argc, argv );
- overlaplen = getMinOverlap ( graphfile );
+ time_t start_t, stop_t, time_bef, time_aft;
+ time ( &start_t );
+ fprintf ( stderr, "\n********************\n" );
+ fprintf ( stderr, "Map\n" );
+ fprintf ( stderr, "********************\n\n" );
+ initenv ( argc, argv );
+ overlaplen = getMinOverlap ( graphfile );
#ifdef MER127
- if ( smallKmer > 12 && smallKmer < 128 && smallKmer % 2 == 1 )
- {
- deltaKmer = overlaplen - smallKmer;
- overlaplen = smallKmer;
- }
+ if ( smallKmer > 12 && smallKmer < 128 && smallKmer % 2 == 1 )
+ {
+ deltaKmer = overlaplen - smallKmer;
+ overlaplen = smallKmer;
+ }
#else
- if ( smallKmer > 12 && smallKmer < 64 && smallKmer % 2 == 1 )
- {
- deltaKmer = overlaplen - smallKmer;
- overlaplen = smallKmer;
- }
+ if ( smallKmer > 12 && smallKmer < 64 && smallKmer % 2 == 1 )
+ {
+ deltaKmer = overlaplen - smallKmer;
+ overlaplen = smallKmer;
+ }
#endif
- fprintf ( stderr, "Kmer size: %d.\n", overlaplen );
- time ( &time_bef );
- ctg_short = overlaplen + 2;
- fprintf ( stderr, "Contig length cutoff: %d.\n", ctg_short );
- prlContig2nodes ( graphfile, ctg_short );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on graph construction: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
- //map long read (asm_flags=4) to edge one by one
- time ( &time_bef );
- prlLongRead2Ctg ( shortrdsfile, graphfile );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on aligning long reads: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
- //map read to edge one by one
- time ( &time_bef );
- prlRead2Ctg ( shortrdsfile, graphfile );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on aligning reads: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
- free_Sets ( KmerSets, thrd_num );
- time ( &stop_t );
- fprintf ( stderr, "Overall time spent on alignment: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
- return 0;
+ fprintf ( stderr, "Kmer size: %d.\n", overlaplen );
+ time ( &time_bef );
+ ctg_short = overlaplen + 2;
+ fprintf ( stderr, "Contig length cutoff: %d.\n", ctg_short );
+ prlContig2nodes ( graphfile, ctg_short );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on graph construction: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
+ //map long read (asm_flags=4) to edge one by one
+ time ( &time_bef );
+ prlLongRead2Ctg ( shortrdsfile, graphfile );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on aligning long reads: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
+ //map read to edge one by one
+ time ( &time_bef );
+ prlRead2Ctg ( shortrdsfile, graphfile );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on aligning reads: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
+ free_Sets ( KmerSets, thrd_num );
+ time ( &stop_t );
+ fprintf ( stderr, "Overall time spent on alignment: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
+ return 0;
}
@@ -149,79 +149,85 @@ int call_align ( int argc, char ** argv )
* Parse command line switches
*****************************************************************************/
-void initenv ( int argc, char ** argv )
+void initenv ( int argc, char **argv )
{
- int copt;
- int inpseq, outseq;
- extern char * optarg;
- char temp[100];
- optind = 1;
- inpseq = outseq = 0;
- fprintf ( stderr, "Parameters: map " );
-
- while ( ( copt = getopt ( argc, argv, "s:g:K:p:k:f" ) ) != EOF )
- {
- //printf("get option\n");
- switch ( copt )
- {
- case 's':
- fprintf ( stderr, "-s %s ", optarg );
- inpseq = 1;
- sscanf ( optarg, "%s", shortrdsfile );
- break;
- case 'g':
- fprintf ( stderr, "-g %s ", optarg );
- outseq = 1;
- sscanf ( optarg, "%s", graphfile );
- break;
- case 'K':
- fprintf ( stderr, "-K %s ", optarg );
- sscanf ( optarg, "%s", temp );
- overlaplen = atoi ( temp );
- break;
- case 'p':
- fprintf ( stderr, "-p %s ", optarg );
- sscanf ( optarg, "%s", temp );
- thrd_num = atoi ( temp );
- break;
- case 'k':
- fprintf ( stderr, "-k %s ", optarg );
- sscanf ( optarg, "%s", temp );
- smallKmer = atoi ( temp );
- break;
- case 'f':
- fill = 1;
- fprintf ( stderr, "-f " );
- break;
- default:
-
- if ( inpseq == 0 || outseq == 0 )
- {
- display_map_usage ();
- exit ( 1 );
- }
- }
- }
-
- fprintf ( stderr, "\n\n" );
-
- if ( inpseq == 0 || outseq == 0 )
- {
- display_map_usage ();
- exit ( 1 );
- }
+ int copt;
+ int inpseq, outseq;
+ extern char *optarg;
+ char temp[100];
+ optind = 1;
+ inpseq = outseq = 0;
+ fprintf ( stderr, "Parameters: map " );
+
+ while ( ( copt = getopt ( argc, argv, "s:g:K:p:k:f" ) ) != EOF )
+ {
+ //printf("get option\n");
+ switch ( copt )
+ {
+ case 's':
+ fprintf ( stderr, "-s %s ", optarg );
+ inpseq = 1;
+ sscanf ( optarg, "%s", shortrdsfile );
+ break;
+
+ case 'g':
+ fprintf ( stderr, "-g %s ", optarg );
+ outseq = 1;
+ sscanf ( optarg, "%s", graphfile );
+ break;
+
+ case 'K':
+ fprintf ( stderr, "-K %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ overlaplen = atoi ( temp );
+ break;
+
+ case 'p':
+ fprintf ( stderr, "-p %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ thrd_num = atoi ( temp );
+ break;
+
+ case 'k':
+ fprintf ( stderr, "-k %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ smallKmer = atoi ( temp );
+ break;
+
+ case 'f':
+ fill = 1;
+ fprintf ( stderr, "-f " );
+ break;
+
+ default:
+
+ if ( inpseq == 0 || outseq == 0 )
+ {
+ display_map_usage ();
+ exit ( 1 );
+ }
+ }
+ }
+
+ fprintf ( stderr, "\n\n" );
+
+ if ( inpseq == 0 || outseq == 0 )
+ {
+ display_map_usage ();
+ exit ( 1 );
+ }
}
static void display_map_usage ()
{
- fprintf ( stderr, "\nmap -s configFile -g inputGraph [-f] [-p n_cpu -k kmer_R2C]\n" );
- fprintf ( stderr, " -s <string> configFile: the config file of solexa reads\n" );
- fprintf ( stderr, " -g <string> inputGraph: prefix of input graph file names\n" );
- fprintf ( stderr, " -f (optional) output gap related reads in map step for using SRkgf to fill gap, [NO]\n" );
- fprintf ( stderr, " -p <int> n_cpu: number of cpu for use, [8]\n" );
+ fprintf ( stderr, "\nmap -s configFile -g inputGraph [-f] [-p n_cpu -k kmer_R2C]\n" );
+ fprintf ( stderr, " -s <string> configFile: the config file of solexa reads\n" );
+ fprintf ( stderr, " -g <string> inputGraph: prefix of input graph file names\n" );
+ fprintf ( stderr, " -f (optional) output gap related reads in map step for using SRkgf to fill gap, [NO]\n" );
+ fprintf ( stderr, " -p <int> n_cpu: number of cpu for use, [8]\n" );
#ifdef MER127
- fprintf ( stderr, " -k <int> kmer_R2C(min 13, max 127): kmer size used for mapping read to contig, [K]\n" );
+ fprintf ( stderr, " -k <int> kmer_R2C(min 13, max 127): kmer size used for mapping read to contig, [K]\n" );
#else
- fprintf ( stderr, " -k <int> kmer_R2C(min 13, max 63): kmer size used for mapping read to contig, [K]\n" );
+ fprintf ( stderr, " -k <int> kmer_R2C(min 13, max 63): kmer size used for mapping read to contig, [K]\n" );
#endif
}
diff --git a/standardPregraph/mem_manager.c b/standardPregraph/mem_manager.c
index 231dadb..4e6d6a2 100644
--- a/standardPregraph/mem_manager.c
+++ b/standardPregraph/mem_manager.c
@@ -1,7 +1,7 @@
/*
* mem_manager.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -26,77 +26,77 @@
#include "extfunc.h"
#include "extvab.h"
-MEM_MANAGER * createMem_manager ( int num_items, size_t unit_size )
+MEM_MANAGER *createMem_manager ( int num_items, size_t unit_size )
{
- MEM_MANAGER * mem_Manager = ( MEM_MANAGER * ) ckalloc ( 1 * sizeof ( MEM_MANAGER ) );
- mem_Manager->block_list = NULL;
- mem_Manager->items_per_block = num_items;
- mem_Manager->item_size = unit_size;
- mem_Manager->recycle_list = NULL;
- mem_Manager->counter = 0;
- return mem_Manager;
+ MEM_MANAGER *mem_Manager = ( MEM_MANAGER * ) ckalloc ( 1 * sizeof ( MEM_MANAGER ) );
+ mem_Manager->block_list = NULL;
+ mem_Manager->items_per_block = num_items;
+ mem_Manager->item_size = unit_size;
+ mem_Manager->recycle_list = NULL;
+ mem_Manager->counter = 0;
+ return mem_Manager;
}
-void freeMem_manager ( MEM_MANAGER * mem_Manager )
+void freeMem_manager ( MEM_MANAGER *mem_Manager )
{
- BLOCK_START * ite_block, *temp_block;
+ BLOCK_START *ite_block, *temp_block;
- if ( !mem_Manager )
- {
- return;
- }
+ if ( !mem_Manager )
+ {
+ return;
+ }
- ite_block = mem_Manager->block_list;
+ ite_block = mem_Manager->block_list;
- while ( ite_block )
- {
- temp_block = ite_block;
- ite_block = ite_block->next;
- free ( ( void * ) temp_block );
- }
+ while ( ite_block )
+ {
+ temp_block = ite_block;
+ ite_block = ite_block->next;
+ free ( ( void * ) temp_block );
+ }
- free ( ( void * ) mem_Manager );
+ free ( ( void * ) mem_Manager );
}
-void * getItem ( MEM_MANAGER * mem_Manager )
+void *getItem ( MEM_MANAGER *mem_Manager )
{
- RECYCLE_MARK * mark; //this is the type of return value
- BLOCK_START * block;
+ RECYCLE_MARK *mark; //this is the type of return value
+ BLOCK_START *block;
- if ( !mem_Manager )
- {
- return NULL;
- }
+ if ( !mem_Manager )
+ {
+ return NULL;
+ }
- if ( mem_Manager->recycle_list )
- {
- mark = mem_Manager->recycle_list;
- mem_Manager->recycle_list = mark->next;
- return mark;
- }
+ if ( mem_Manager->recycle_list )
+ {
+ mark = mem_Manager->recycle_list;
+ mem_Manager->recycle_list = mark->next;
+ return mark;
+ }
- mem_Manager->counter++;
+ mem_Manager->counter++;
- if ( !mem_Manager->block_list || mem_Manager->index_in_block == mem_Manager->items_per_block )
- {
- //pthread_mutex_lock(&gmutex);
- block = ckalloc ( sizeof ( BLOCK_START ) + mem_Manager->items_per_block * mem_Manager->item_size );
- //mem_Manager->counter += sizeof(BLOCK_START)+mem_Manager->items_per_block*mem_Manager->item_size;
- //pthread_mutex_unlock(&gmutex);
- block->next = mem_Manager->block_list;
- mem_Manager->block_list = block;
- mem_Manager->index_in_block = 1;
- return ( RECYCLE_MARK * ) ( ( void * ) block + sizeof ( BLOCK_START ) );
- }
+ if ( !mem_Manager->block_list || mem_Manager->index_in_block == mem_Manager->items_per_block )
+ {
+ //pthread_mutex_lock(&gmutex);
+ block = ckalloc ( sizeof ( BLOCK_START ) + mem_Manager->items_per_block * mem_Manager->item_size );
+ //mem_Manager->counter += sizeof(BLOCK_START)+mem_Manager->items_per_block*mem_Manager->item_size;
+ //pthread_mutex_unlock(&gmutex);
+ block->next = mem_Manager->block_list;
+ mem_Manager->block_list = block;
+ mem_Manager->index_in_block = 1;
+ return ( RECYCLE_MARK * ) ( ( void * ) block + sizeof ( BLOCK_START ) );
+ }
- block = mem_Manager->block_list;
- return ( RECYCLE_MARK * ) ( ( void * ) block + sizeof ( BLOCK_START ) + mem_Manager->item_size * ( mem_Manager->index_in_block++ ) );
+ block = mem_Manager->block_list;
+ return ( RECYCLE_MARK * ) ( ( void * ) block + sizeof ( BLOCK_START ) + mem_Manager->item_size * ( mem_Manager->index_in_block++ ) );
}
-void returnItem ( MEM_MANAGER * mem_Manager, void * item )
+void returnItem ( MEM_MANAGER *mem_Manager, void *item )
{
- RECYCLE_MARK * mark;
- mark = item;
- mark->next = mem_Manager->recycle_list;
- mem_Manager->recycle_list = mark;
+ RECYCLE_MARK *mark;
+ mark = item;
+ mark->next = mem_Manager->recycle_list;
+ mem_Manager->recycle_list = mark;
}
diff --git a/standardPregraph/newhash.c b/standardPregraph/newhash.c
index 48cbdb8..a10ef4a 100644
--- a/standardPregraph/newhash.c
+++ b/standardPregraph/newhash.c
@@ -1,7 +1,7 @@
/*
* newhash.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -33,27 +33,27 @@
#ifdef MER127
static const kmer_t empty_kmer = { {0, 0, 0, 0}, 0, 0, 0, 0, 0, 1, 0, 0 };
-static inline ubyte8 modular ( KmerSet * set, Kmer seq )
+static inline ubyte8 modular ( KmerSet *set, Kmer seq )
{
- ubyte8 temp;
- temp = ( seq.high1 % set->size ) << 32 | ( seq.low1 >> 32 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.low1 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.high2 >> 32 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.high2 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.low2 >> 32 & 0xffffffff );
- temp = ( temp % set->size ) << 32 | ( seq.low2 & 0xffffffff );
- temp = ( ubyte8 ) ( temp % set->size );
- return temp;
+ ubyte8 temp;
+ temp = ( seq.high1 % set->size ) << 32 | ( seq.low1 >> 32 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.low1 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.high2 >> 32 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.high2 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.low2 >> 32 & 0xffffffff );
+ temp = ( temp % set->size ) << 32 | ( seq.low2 & 0xffffffff );
+ temp = ( ubyte8 ) ( temp % set->size );
+ return temp;
}
#else
static const kmer_t empty_kmer = { {0, 0}, 0, 0, 0, 0, 0, 1, 0, 0 };
-static inline ubyte8 modular ( KmerSet * set, Kmer seq )
+static inline ubyte8 modular ( KmerSet *set, Kmer seq )
{
- ubyte8 hc;
- __uint128_t temp;
- temp = Kmer2int128 ( seq );
- hc = temp % set->size;
- return hc;
+ ubyte8 hc;
+ __uint128_t temp;
+ temp = Kmer2int128 ( seq );
+ hc = temp % set->size;
+ return hc;
}
#endif
@@ -71,29 +71,29 @@ Output:
Return:
None.
*************************************************/
-static inline void update_kmer ( kmer_t * mer, ubyte left, ubyte right )
+static inline void update_kmer ( kmer_t *mer, ubyte left, ubyte right )
{
- ubyte4 cov;
-
- if ( left < 4 )
- {
- cov = get_kmer_left_cov ( *mer, left );
-
- if ( cov < MAX_KMER_COV )
- {
- set_kmer_left_cov ( *mer, left, cov + 1 );
- }
- }
-
- if ( right < 4 )
- {
- cov = get_kmer_right_cov ( *mer, right );
-
- if ( cov < MAX_KMER_COV )
- {
- set_kmer_right_cov ( *mer, right, cov + 1 );
- }
- }
+ ubyte4 cov;
+
+ if ( left < 4 )
+ {
+ cov = get_kmer_left_cov ( *mer, left );
+
+ if ( cov < MAX_KMER_COV )
+ {
+ set_kmer_left_cov ( *mer, left, cov + 1 );
+ }
+ }
+
+ if ( right < 4 )
+ {
+ cov = get_kmer_right_cov ( *mer, right );
+
+ if ( cov < MAX_KMER_COV )
+ {
+ set_kmer_right_cov ( *mer, right, cov + 1 );
+ }
+ }
}
/*************************************************
@@ -111,65 +111,65 @@ Output:
Return:
None.
*************************************************/
-static inline void set_new_kmer ( kmer_t * mer, Kmer seq, ubyte left, ubyte right )
+static inline void set_new_kmer ( kmer_t *mer, Kmer seq, ubyte left, ubyte right )
{
- *mer = empty_kmer;
- set_kmer_seq ( *mer, seq );
-
- if ( left < 4 )
- {
- set_kmer_left_cov ( *mer, left, 1 );
- }
-
- if ( right < 4 )
- {
- set_kmer_right_cov ( *mer, right, 1 );
- }
+ *mer = empty_kmer;
+ set_kmer_seq ( *mer, seq );
+
+ if ( left < 4 )
+ {
+ set_kmer_left_cov ( *mer, left, 1 );
+ }
+
+ if ( right < 4 )
+ {
+ set_kmer_right_cov ( *mer, right, 1 );
+ }
}
static inline int is_prime_kh ( ubyte8 num )
{
- ubyte8 i, max;
+ ubyte8 i, max;
- if ( num < 4 )
- {
- return 1;
- }
+ if ( num < 4 )
+ {
+ return 1;
+ }
- if ( num % 2 == 0 )
- {
- return 0;
- }
+ if ( num % 2 == 0 )
+ {
+ return 0;
+ }
- max = ( ubyte8 ) sqrt ( ( float ) num );
+ max = ( ubyte8 ) sqrt ( ( float ) num );
- for ( i = 3; i < max; i += 2 )
- {
- if ( num % i == 0 )
- {
- return 0;
- }
- }
+ for ( i = 3; i < max; i += 2 )
+ {
+ if ( num % i == 0 )
+ {
+ return 0;
+ }
+ }
- return 1;
+ return 1;
}
static inline ubyte8 find_next_prime_kh ( ubyte8 num )
{
- if ( num % 2 == 0 )
- {
- num++;
- }
-
- while ( 1 )
- {
- if ( is_prime_kh ( num ) )
- {
- return num;
- }
-
- num += 2;
- }
+ if ( num % 2 == 0 )
+ {
+ num++;
+ }
+
+ while ( 1 )
+ {
+ if ( is_prime_kh ( num ) )
+ {
+ return num;
+ }
+
+ num += 2;
+ }
}
/*************************************************
@@ -185,131 +185,131 @@ Output:
Return:
The new kmer hash.
*************************************************/
-PUBLIC_FUNC KmerSet * init_kmerset ( ubyte8 init_size, float load_factor )
+PUBLIC_FUNC KmerSet *init_kmerset ( ubyte8 init_size, float load_factor )
{
- KmerSet * set;
-
- if ( init_size < 3 )
- {
- init_size = 3;
- }
- else
- {
- init_size = find_next_prime_kh ( init_size );
- }
-
- set = ( KmerSet * ) malloc ( sizeof ( KmerSet ) );
- set->size = init_size;
- set->count = 0;
- set->max = set->size * load_factor;
-
- if ( load_factor <= 0 )
- {
- load_factor = 0.25f;
- }
- else if ( load_factor >= 1 )
- {
- load_factor = 0.75f;
- }
-
- set->load_factor = load_factor;
- set->iter_ptr = 0;
- set->array = calloc ( set->size, sizeof ( kmer_t ) );
- set->flags = malloc ( ( set->size + 15 ) / 16 * 4 );
- memset ( set->flags, 0x55, ( set->size + 15 ) / 16 * 4 );
- return set;
+ KmerSet *set;
+
+ if ( init_size < 3 )
+ {
+ init_size = 3;
+ }
+ else
+ {
+ init_size = find_next_prime_kh ( init_size );
+ }
+
+ set = ( KmerSet * ) malloc ( sizeof ( KmerSet ) );
+ set->size = init_size;
+ set->count = 0;
+ set->max = set->size * load_factor;
+
+ if ( load_factor <= 0 )
+ {
+ load_factor = 0.25f;
+ }
+ else if ( load_factor >= 1 )
+ {
+ load_factor = 0.75f;
+ }
+
+ set->load_factor = load_factor;
+ set->iter_ptr = 0;
+ set->array = calloc ( set->size, sizeof ( kmer_t ) );
+ set->flags = malloc ( ( set->size + 15 ) / 16 * 4 );
+ memset ( set->flags, 0x55, ( set->size + 15 ) / 16 * 4 );
+ return set;
}
-PROTECTED_FUNC static inline ubyte8 get_kmerset ( KmerSet * set, Kmer seq )
+PROTECTED_FUNC static inline ubyte8 get_kmerset ( KmerSet *set, Kmer seq )
{
- ubyte8 hc;
- // U256b temp;
- // temp = Kmer2int256(seq);
- // hc = temp.low % set->size;
- // hc = modular (set, seq);
- // /*
+ ubyte8 hc;
+ // U256b temp;
+ // temp = Kmer2int256(seq);
+ // hc = temp.low % set->size;
+ // hc = modular (set, seq);
+ // /*
#ifdef MER127
- hc = modular ( set, seq );
+ hc = modular ( set, seq );
#else
- __uint128_t temp;
- temp = Kmer2int128 ( seq );
- hc = temp % set->size;
+ __uint128_t temp;
+ temp = Kmer2int128 ( seq );
+ hc = temp % set->size;
#endif
- // */
-
- while ( 1 )
- {
- if ( is_kmer_entity_null ( set->flags, hc ) )
- {
- return hc;
- }
- else
- {
- if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
- {
- return hc;
- }
- }
-
- hc++;
-
- if ( hc == set->size )
- {
- hc = 0;
- }
- }
-
- return set->size;
+ // */
+
+ while ( 1 )
+ {
+ if ( is_kmer_entity_null ( set->flags, hc ) )
+ {
+ return hc;
+ }
+ else
+ {
+ if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
+ {
+ return hc;
+ }
+ }
+
+ hc++;
+
+ if ( hc == set->size )
+ {
+ hc = 0;
+ }
+ }
+
+ return set->size;
}
-PUBLIC_FUNC int search_kmerset ( KmerSet * set, Kmer seq, kmer_t ** rs )
+PUBLIC_FUNC int search_kmerset ( KmerSet *set, Kmer seq, kmer_t **rs )
{
- ubyte8 hc;
- // U256b temp;
- // temp = Kmer2int256(seq);
- // hc = temp.low % set->size;
- // hc = modular (set, seq);
- // /*
+ ubyte8 hc;
+ // U256b temp;
+ // temp = Kmer2int256(seq);
+ // hc = temp.low % set->size;
+ // hc = modular (set, seq);
+ // /*
#ifdef MER127
- hc = modular ( set, seq );
+ hc = modular ( set, seq );
#else
- __uint128_t temp;
- temp = Kmer2int128 ( seq );
- hc = temp % set->size;
+ __uint128_t temp;
+ temp = Kmer2int128 ( seq );
+ hc = temp % set->size;
#endif
- // */
-
- while ( 1 )
- {
- if ( is_kmer_entity_null ( set->flags, hc ) )
- {
- return 0;
- }
- else
- {
- if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
- {
- *rs = set->array + hc;
- return 1;
- }
- }
-
- hc++;
-
- if ( hc == set->size )
- {
- hc = 0;
- }
- }
-
- return 0;
+ // */
+
+ while ( 1 )
+ {
+ if ( is_kmer_entity_null ( set->flags, hc ) )
+ {
+ return 0;
+ }
+ else
+ {
+ if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
+ {
+ *rs = set->array + hc;
+ return 1;
+ }
+ }
+
+ hc++;
+
+ if ( hc == set->size )
+ {
+ hc = 0;
+ }
+ }
+
+ return 0;
}
-PUBLIC_FUNC static inline int exists_kmerset ( KmerSet * set, Kmer seq )
+PUBLIC_FUNC static inline int exists_kmerset ( KmerSet *set, Kmer seq )
{
- ubyte8 idx;
- idx = get_kmerset ( set, seq );
- return !is_kmer_entity_null ( set->flags, idx );
+ ubyte8 idx;
+ idx = get_kmerset ( set, seq );
+ return !is_kmer_entity_null ( set->flags, idx );
}
/*************************************************
@@ -325,121 +325,121 @@ Output:
Return:
None.
*************************************************/
-PROTECTED_FUNC static inline void encap_kmerset ( KmerSet * set, ubyte8 num )
+PROTECTED_FUNC static inline void encap_kmerset ( KmerSet *set, ubyte8 num )
{
- ubyte4 * flags, *f;
- ubyte8 i, n, size, hc;
- kmer_t key, tmp;
-
- if ( set->count + num <= set->max )
- {
- return;
- }
-
- n = set->size;
-
- if ( initKmerSetSize != 0 )
- {
- if ( set->load_factor < 0.88 )
- {
- set->load_factor = 0.88;
- set->max = set->size * set->load_factor;
- return;
- }
- else
- {
- fprintf ( stderr, "-- Static memory pool exploded, please define a larger value. --\n" );
- abort();
- }
- }
-
- do
- {
- if ( n < 0xFFFFFFFU )
- {
- n <<= 1;
- }
- else
- {
- n += 0xFFFFFFU;
- }
-
- n = find_next_prime_kh ( n );
- }
- while ( n * set->load_factor < set->count + num );
-
- set->array = realloc ( set->array, n * sizeof ( kmer_t ) );
- //printf("Allocate Mem %lld(%d*%lld*%d)bytes\n",thrd_num*n*sizeof(kmer_t),thrd_num,n,sizeof(kmer_t));
-
- if ( set->array == NULL )
- {
- fprintf ( stderr, "-- Out of memory --\n" );
- abort ();
- }
-
- flags = malloc ( ( n + 15 ) / 16 * 4 );
- memset ( flags, 0x55, ( n + 15 ) / 16 * 4 );
- size = set->size;
- set->size = n;
- set->max = n * set->load_factor;
- f = set->flags;
- set->flags = flags;
- flags = f;
- __uint128_t temp;
-
- // U256b temp;
- for ( i = 0; i < size; i++ )
- {
- if ( !exists_kmer_entity ( flags, i ) )
- {
- continue;
- }
-
- key = set->array[i];
- set_kmer_entity_del ( flags, i );
-
- while ( 1 )
- {
- // temp = Kmer2int256(get_kmer_seq(key));
- // hc = temp.low % set->size;
- // hc = modular (set, get_kmer_seq (key));
- // /*
+ ubyte4 *flags, *f;
+ ubyte8 i, n, size, hc;
+ kmer_t key, tmp;
+
+ if ( set->count + num <= set->max )
+ {
+ return;
+ }
+
+ n = set->size;
+
+ if ( initKmerSetSize != 0 )
+ {
+ if ( set->load_factor < 0.88 )
+ {
+ set->load_factor = 0.88;
+ set->max = set->size * set->load_factor;
+ return;
+ }
+ else
+ {
+ fprintf ( stderr, "-- Static memory pool exploded, please define a larger value. --\n" );
+ abort();
+ }
+ }
+
+ do
+ {
+ if ( n < 0xFFFFFFFU )
+ {
+ n <<= 1;
+ }
+ else
+ {
+ n += 0xFFFFFFU;
+ }
+
+ n = find_next_prime_kh ( n );
+ }
+ while ( n * set->load_factor < set->count + num );
+
+ set->array = realloc ( set->array, n * sizeof ( kmer_t ) );
+ //printf("Allocate Mem %lld(%d*%lld*%d)bytes\n",thrd_num*n*sizeof(kmer_t),thrd_num,n,sizeof(kmer_t));
+
+ if ( set->array == NULL )
+ {
+ fprintf ( stderr, "-- Out of memory --\n" );
+ abort ();
+ }
+
+ flags = malloc ( ( n + 15 ) / 16 * 4 );
+ memset ( flags, 0x55, ( n + 15 ) / 16 * 4 );
+ size = set->size;
+ set->size = n;
+ set->max = n * set->load_factor;
+ f = set->flags;
+ set->flags = flags;
+ flags = f;
+ __uint128_t temp;
+
+ // U256b temp;
+ for ( i = 0; i < size; i++ )
+ {
+ if ( !exists_kmer_entity ( flags, i ) )
+ {
+ continue;
+ }
+
+ key = set->array[i];
+ set_kmer_entity_del ( flags, i );
+
+ while ( 1 )
+ {
+ // temp = Kmer2int256(get_kmer_seq(key));
+ // hc = temp.low % set->size;
+ // hc = modular (set, get_kmer_seq (key));
+ // /*
#ifdef MER127
- hc = modular ( set, get_kmer_seq ( key ) );
+ hc = modular ( set, get_kmer_seq ( key ) );
#else
- temp = Kmer2int128 ( get_kmer_seq ( key ) );
- hc = temp % set->size;
+ temp = Kmer2int128 ( get_kmer_seq ( key ) );
+ hc = temp % set->size;
#endif
- // */
-
- while ( !is_kmer_entity_null ( set->flags, hc ) )
- {
- hc++;
-
- if ( hc == set->size )
- {
- hc = 0;
- }
- }
-
- clear_kmer_entity_null ( set->flags, hc );
-
- if ( hc < size && exists_kmer_entity ( flags, hc ) )
- {
- tmp = key;
- key = set->array[hc];
- set->array[hc] = tmp;
- set_kmer_entity_del ( flags, hc );
- }
- else
- {
- set->array[hc] = key;
- break;
- }
- }
- }
-
- free ( flags );
+ // */
+
+ while ( !is_kmer_entity_null ( set->flags, hc ) )
+ {
+ hc++;
+
+ if ( hc == set->size )
+ {
+ hc = 0;
+ }
+ }
+
+ clear_kmer_entity_null ( set->flags, hc );
+
+ if ( hc < size && exists_kmer_entity ( flags, hc ) )
+ {
+ tmp = key;
+ key = set->array[hc];
+ set->array[hc] = tmp;
+ set_kmer_entity_del ( flags, hc );
+ }
+ else
+ {
+ set->array[hc] = key;
+ break;
+ }
+ }
+ }
+
+ free ( flags );
}
/*************************************************
@@ -458,61 +458,61 @@ Output:
Return:
0 if it failed to put kmer into kmerset.
*************************************************/
-PUBLIC_FUNC int put_kmerset ( KmerSet * set, Kmer seq, ubyte left, ubyte right, kmer_t ** kmer_p )
+PUBLIC_FUNC int put_kmerset ( KmerSet *set, Kmer seq, ubyte left, ubyte right, kmer_t **kmer_p )
{
- ubyte8 hc;
-
- if ( set->count + 1 > set->max )
- {
- encap_kmerset ( set, 1 );
- }
-
- // U256b temp;
- // temp = Kmer2int256(seq);
- // hc = temp.low % set->size;
- // hc = modular (set, seq);
- // /*
+ ubyte8 hc;
+
+ if ( set->count + 1 > set->max )
+ {
+ encap_kmerset ( set, 1 );
+ }
+
+ // U256b temp;
+ // temp = Kmer2int256(seq);
+ // hc = temp.low % set->size;
+ // hc = modular (set, seq);
+ // /*
#ifdef MER127
- hc = modular ( set, seq );
+ hc = modular ( set, seq );
#else
- __uint128_t temp;
- temp = Kmer2int128 ( seq );
- hc = temp % set->size;
+ __uint128_t temp;
+ temp = Kmer2int128 ( seq );
+ hc = temp % set->size;
#endif
- // */
-
- do
- {
- if ( is_kmer_entity_null ( set->flags, hc ) )
- {
- clear_kmer_entity_null ( set->flags, hc );
- set_new_kmer ( set->array + hc, seq, left, right );
- set->count++;
- *kmer_p = set->array + hc;
- return 0;
- }
- else
- {
- if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
- {
- update_kmer ( set->array + hc, left, right );
- set->array[hc].single = 0;
- *kmer_p = set->array + hc;
- return 1;
- }
- }
-
- hc++;
-
- if ( hc == set->size )
- {
- hc = 0;
- }
- }
- while ( 1 );
-
- *kmer_p = NULL;
- return 0;
+ // */
+
+ do
+ {
+ if ( is_kmer_entity_null ( set->flags, hc ) )
+ {
+ clear_kmer_entity_null ( set->flags, hc );
+ set_new_kmer ( set->array + hc, seq, left, right );
+ set->count++;
+ *kmer_p = set->array + hc;
+ return 0;
+ }
+ else
+ {
+ if ( KmerEqual ( get_kmer_seq ( set->array[hc] ), seq ) )
+ {
+ update_kmer ( set->array + hc, left, right );
+ set->array[hc].single = 0;
+ *kmer_p = set->array + hc;
+ return 1;
+ }
+ }
+
+ hc++;
+
+ if ( hc == set->size )
+ {
+ hc = 0;
+ }
+ }
+ while ( 1 );
+
+ *kmer_p = NULL;
+ return 0;
}
/*************************************************
@@ -527,9 +527,9 @@ Output:
Return:
The kmer number of the kmerset.
*************************************************/
-PUBLIC_FUNC byte8 count_kmerset ( KmerSet * set )
+PUBLIC_FUNC byte8 count_kmerset ( KmerSet *set )
{
- return set->count;
+ return set->count;
}
/*************************************************
@@ -544,33 +544,33 @@ Output:
Return:
None.
*************************************************/
-PUBLIC_FUNC static inline void reset_iter_kmerset ( KmerSet * set )
+PUBLIC_FUNC static inline void reset_iter_kmerset ( KmerSet *set )
{
- set->iter_ptr = 0;
+ set->iter_ptr = 0;
}
-PUBLIC_FUNC static inline ubyte8 iter_kmerset ( KmerSet * set, kmer_t ** rs )
+PUBLIC_FUNC static inline ubyte8 iter_kmerset ( KmerSet *set, kmer_t **rs )
{
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- *rs = set->array + set->iter_ptr;
- set->iter_ptr++;
- return 1;
- }
-
- set->iter_ptr++;
- }
-
- return 0;
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ *rs = set->array + set->iter_ptr;
+ set->iter_ptr++;
+ return 1;
+ }
+
+ set->iter_ptr++;
+ }
+
+ return 0;
}
-PUBLIC_FUNC void free_kmerset ( KmerSet * set )
+PUBLIC_FUNC void free_kmerset ( KmerSet *set )
{
- free ( set->array );
- free ( set->flags );
- free ( set );
+ free ( set->array );
+ free ( set->flags );
+ free ( set );
}
/*************************************************
@@ -586,16 +586,16 @@ Output:
Return:
None.
*************************************************/
-PUBLIC_FUNC void free_Sets ( KmerSet ** sets, int num )
+PUBLIC_FUNC void free_Sets ( KmerSet **sets, int num )
{
- int i;
+ int i;
- for ( i = 0; i < num; i++ )
- {
- free_kmerset ( sets[i] );
- }
+ for ( i = 0; i < num; i++ )
+ {
+ free_kmerset ( sets[i] );
+ }
- free ( ( void * ) sets );
+ free ( ( void * ) sets );
}
/*************************************************
@@ -610,19 +610,19 @@ Output:
Return:
The number of the upstream branches.
*************************************************/
-int count_branch2prev ( kmer_t * node )
+int count_branch2prev ( kmer_t *node )
{
- int num = 0, i;
+ int num = 0, i;
- for ( i = 0; i < 4; i++ )
- {
- if ( get_kmer_left_cov ( *node, i ) > 0 )
- {
- num++;
- }
- }
+ for ( i = 0; i < 4; i++ )
+ {
+ if ( get_kmer_left_cov ( *node, i ) > 0 )
+ {
+ num++;
+ }
+ }
- return num;
+ return num;
}
/*************************************************
@@ -637,19 +637,19 @@ Output:
Return:
The number of the downstream branches.
*************************************************/
-int count_branch2next ( kmer_t * node )
+int count_branch2next ( kmer_t *node )
{
- int num = 0, i;
+ int num = 0, i;
- for ( i = 0; i < 4; i++ )
- {
- if ( get_kmer_right_cov ( *node, i ) > 0 )
- {
- num++;
- }
- }
+ for ( i = 0; i < 4; i++ )
+ {
+ if ( get_kmer_right_cov ( *node, i ) > 0 )
+ {
+ num++;
+ }
+ }
- return num;
+ return num;
}
/*************************************************
@@ -666,16 +666,16 @@ Output:
Return:
None.
*************************************************/
-void dislink2prevUncertain ( kmer_t * node, char ch, boolean smaller )
+void dislink2prevUncertain ( kmer_t *node, char ch, boolean smaller )
{
- if ( smaller )
- {
- set_kmer_left_cov ( *node, ch, 0 );
- }
- else
- {
- set_kmer_right_cov ( *node, int_comp ( ch ), 0 );
- }
+ if ( smaller )
+ {
+ set_kmer_left_cov ( *node, ch, 0 );
+ }
+ else
+ {
+ set_kmer_right_cov ( *node, int_comp ( ch ), 0 );
+ }
}
/*************************************************
@@ -692,14 +692,14 @@ Output:
Return:
None.
*************************************************/
-void dislink2nextUncertain ( kmer_t * node, char ch, boolean smaller )
+void dislink2nextUncertain ( kmer_t *node, char ch, boolean smaller )
{
- if ( smaller )
- {
- set_kmer_right_cov ( *node, ch, 0 );
- }
- else
- {
- set_kmer_left_cov ( *node, int_comp ( ch ), 0 );
- }
+ if ( smaller )
+ {
+ set_kmer_right_cov ( *node, ch, 0 );
+ }
+ else
+ {
+ set_kmer_left_cov ( *node, int_comp ( ch ), 0 );
+ }
}
diff --git a/standardPregraph/node2edge.c b/standardPregraph/node2edge.c
index df5ae73..e49d525 100644
--- a/standardPregraph/node2edge.c
+++ b/standardPregraph/node2edge.c
@@ -1,7 +1,7 @@
/*
* node2edge.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -37,13 +37,13 @@ static int edge_c, edgeCounter; // current edge count number for both st
static preEDGE temp_edge; // for temp use in merge_V2()
static char edge_seq[100000]; //use this static 'edge_seq ' as an temp seq in merge_V2() for speed ..
-static void make_edge ( gzFile * fp );
-static void merge_linearV2 ( char bal_edge, STACK * nStack, int count, gzFile * fp );
-static int check_iden_kmerList ( STACK * stack1, STACK * stack2 );
+static void make_edge ( gzFile *fp );
+static void merge_linearV2 ( char bal_edge, STACK *nStack, int count, gzFile *fp );
+static int check_iden_kmerList ( STACK *stack1, STACK *stack2 );
//for stack
-static STACK * nodeStack; //the stack for storing linear nodes
-static STACK * bal_nodeStack; // the stack for storing the reverse complemental nodes ..
+static STACK *nodeStack; //the stack for storing linear nodes
+static STACK *bal_nodeStack; // the stack for storing the reverse complemental nodes ..
/*************************************************
@@ -58,15 +58,15 @@ Output:
Return:
None.
*************************************************/
-void kmer2edges ( char * outfile )
+void kmer2edges ( char *outfile )
{
- gzFile * fp;
- char temp[256];
- sprintf ( temp, "%s.edge.gz", outfile );
- fp = gzopen ( temp, "w" );
- make_edge ( fp );
- gzclose ( fp );
- num_ed = edge_c;
+ gzFile *fp;
+ char temp[256];
+ sprintf ( temp, "%s.edge.gz", outfile );
+ fp = gzopen ( temp, "w" );
+ make_edge ( fp );
+ gzclose ( fp );
+ num_ed = edge_c;
}
/*************************************************
@@ -83,138 +83,138 @@ Output:
Return:
None.
*************************************************/
-static void stringBeads ( KMER_PT * firstBead, char nextch, int * node_c )
+static void stringBeads ( KMER_PT *firstBead, char nextch, int *node_c )
{
- boolean smaller, found;
- Kmer tempKmer, bal_word;
- Kmer word = firstBead->kmer;
- ubyte8 hash_ban;
- kmer_t * outgoing_node;
- int nodeCounter = 1, setPicker;
- char ch;
- unsigned char flag;
- KMER_PT * temp_pt, *prev_pt = firstBead;
- word = prev_pt->kmer;
- nodeCounter = 1;
- word = nextKmer ( word, nextch );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerLarger ( word, bal_word ) )
- {
- tempKmer = bal_word;
- bal_word = word;
- word = tempKmer;
- smaller = 0;
- }
- else
- {
- smaller = 1;
- }
-
- hash_ban = hash_kmer ( word );
- setPicker = hash_ban % thrd_num;
- found = search_kmerset ( KmerSets[setPicker], word, &outgoing_node );
-
- while ( found && ( outgoing_node->linear ) ) // for every node in this line
- {
- nodeCounter++;
- temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
- temp_pt->node = outgoing_node;
- temp_pt->isSmaller = smaller;
-
- if ( smaller )
- {
- temp_pt->kmer = word;
- }
- else
- {
- temp_pt->kmer = bal_word;
- }
-
- prev_pt = temp_pt;
-
- if ( smaller )
- {
- for ( ch = 0; ch < 4; ch++ )
- {
- flag = get_kmer_right_cov ( *outgoing_node, ch );
-
- if ( flag )
- {
- break;
- }
- }
-
- word = nextKmer ( prev_pt->kmer, ch );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerLarger ( word, bal_word ) )
- {
- tempKmer = bal_word;
- bal_word = word;
- word = tempKmer;
- smaller = 0;
- }
- else
- {
- smaller = 1;
- }
-
- hash_ban = hash_kmer ( word );
- setPicker = hash_ban % thrd_num;
- found = search_kmerset ( KmerSets[setPicker], word, &outgoing_node );
- }
- else
- {
- for ( ch = 0; ch < 4; ch++ )
- {
- flag = get_kmer_left_cov ( *outgoing_node, ch );
-
- if ( flag )
- {
- break;
- }
- }
-
- word = nextKmer ( prev_pt->kmer, int_comp ( ch ) );
- bal_word = reverseComplement ( word, overlaplen );
-
- if ( KmerLarger ( word, bal_word ) )
- {
- tempKmer = bal_word;
- bal_word = word;
- word = tempKmer;
- smaller = 0;
- }
- else
- {
- smaller = 1;
- }
-
- hash_ban = hash_kmer ( word );
- setPicker = hash_ban % thrd_num;
- found = search_kmerset ( KmerSets[setPicker], word, &outgoing_node );
- }
- }
-
- if ( outgoing_node ) //this is always true
- {
- nodeCounter++;
- temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
- temp_pt->node = outgoing_node;
- temp_pt->isSmaller = smaller;
-
- if ( smaller )
- {
- temp_pt->kmer = word;
- }
- else
- {
- temp_pt->kmer = bal_word;
- }
- }
-
- *node_c = nodeCounter;
+ boolean smaller, found;
+ Kmer tempKmer, bal_word;
+ Kmer word = firstBead->kmer;
+ ubyte8 hash_ban;
+ kmer_t *outgoing_node;
+ int nodeCounter = 1, setPicker;
+ char ch;
+ unsigned char flag;
+ KMER_PT *temp_pt, *prev_pt = firstBead;
+ word = prev_pt->kmer;
+ nodeCounter = 1;
+ word = nextKmer ( word, nextch );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerLarger ( word, bal_word ) )
+ {
+ tempKmer = bal_word;
+ bal_word = word;
+ word = tempKmer;
+ smaller = 0;
+ }
+ else
+ {
+ smaller = 1;
+ }
+
+ hash_ban = hash_kmer ( word );
+ setPicker = hash_ban % thrd_num;
+ found = search_kmerset ( KmerSets[setPicker], word, &outgoing_node );
+
+ while ( found && ( outgoing_node->linear ) ) // for every node in this line
+ {
+ nodeCounter++;
+ temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
+ temp_pt->node = outgoing_node;
+ temp_pt->isSmaller = smaller;
+
+ if ( smaller )
+ {
+ temp_pt->kmer = word;
+ }
+ else
+ {
+ temp_pt->kmer = bal_word;
+ }
+
+ prev_pt = temp_pt;
+
+ if ( smaller )
+ {
+ for ( ch = 0; ch < 4; ch++ )
+ {
+ flag = get_kmer_right_cov ( *outgoing_node, ch );
+
+ if ( flag )
+ {
+ break;
+ }
+ }
+
+ word = nextKmer ( prev_pt->kmer, ch );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerLarger ( word, bal_word ) )
+ {
+ tempKmer = bal_word;
+ bal_word = word;
+ word = tempKmer;
+ smaller = 0;
+ }
+ else
+ {
+ smaller = 1;
+ }
+
+ hash_ban = hash_kmer ( word );
+ setPicker = hash_ban % thrd_num;
+ found = search_kmerset ( KmerSets[setPicker], word, &outgoing_node );
+ }
+ else
+ {
+ for ( ch = 0; ch < 4; ch++ )
+ {
+ flag = get_kmer_left_cov ( *outgoing_node, ch );
+
+ if ( flag )
+ {
+ break;
+ }
+ }
+
+ word = nextKmer ( prev_pt->kmer, int_comp ( ch ) );
+ bal_word = reverseComplement ( word, overlaplen );
+
+ if ( KmerLarger ( word, bal_word ) )
+ {
+ tempKmer = bal_word;
+ bal_word = word;
+ word = tempKmer;
+ smaller = 0;
+ }
+ else
+ {
+ smaller = 1;
+ }
+
+ hash_ban = hash_kmer ( word );
+ setPicker = hash_ban % thrd_num;
+ found = search_kmerset ( KmerSets[setPicker], word, &outgoing_node );
+ }
+ }
+
+ if ( outgoing_node ) //this is always true
+ {
+ nodeCounter++;
+ temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
+ temp_pt->node = outgoing_node;
+ temp_pt->isSmaller = smaller;
+
+ if ( smaller )
+ {
+ temp_pt->kmer = word;
+ }
+ else
+ {
+ temp_pt->kmer = bal_word;
+ }
+ }
+
+ *node_c = nodeCounter;
}
/*************************************************
@@ -234,121 +234,121 @@ Output:
Return:
0.
*************************************************/
-static int startEdgeFromNode ( kmer_t * node1, gzFile * fp )
+static int startEdgeFromNode ( kmer_t *node1, gzFile *fp )
{
- int node_c, palindrome;
- unsigned char flag;
- KMER_PT * ite_pt, *temp_pt;
- Kmer word1, bal_word1;
- char ch1;
- /*
- if (node1->linear || node1->deleted)
- {
- return 0;
- }
- // */
- // ignore floating loop
- word1 = node1->seq;
- bal_word1 = reverseComplement ( word1, overlaplen );
-
- // linear structure
- for ( ch1 = 0; ch1 < 4; ch1++ ) // for every node on outgoing list
- {
- flag = get_kmer_right_cov ( *node1, ch1 );
-
- if ( !flag )
- {
- continue;
- }
-
- emptyStack ( nodeStack );
- temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
- temp_pt->node = node1;
- temp_pt->isSmaller = 1;
- temp_pt->kmer = word1;
- stringBeads ( temp_pt, ch1, &node_c );
-
- //printf("%d nodes\n",node_c);
- if ( node_c < 2 )
- {
- fprintf ( stderr, "%d nodes in this line!!!!!!!!!!!\n", node_c );
- }
- else
- {
- //make a reverse complement node list
- stackBackup ( nodeStack );
- emptyStack ( bal_nodeStack );
-
- while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL )
- {
- temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack );
- temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen );
- }
-
- stackRecover ( nodeStack );
- palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack );
- stackRecover ( nodeStack );
-
- if ( palindrome )
- {
- merge_linearV2 ( 0, nodeStack, node_c, fp );
- }
- else
- {
- merge_linearV2 ( 1, nodeStack, node_c, fp );
- }
- }
- } //every possible outgoing edges
-
- for ( ch1 = 0; ch1 < 4; ch1++ ) // for every node on incoming list
- {
- flag = get_kmer_left_cov ( *node1, ch1 );
-
- if ( !flag )
- {
- continue;
- }
-
- emptyStack ( nodeStack );
- temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
- temp_pt->node = node1;
- temp_pt->isSmaller = 0;
- temp_pt->kmer = bal_word1;
- stringBeads ( temp_pt, int_comp ( ch1 ), &node_c );
-
- if ( node_c < 2 )
- {
- fprintf ( stderr, "%d nodes in this line!!!!!!!!!!!\n", node_c );
- }
- else
- {
- //make a reverse complement node list
- stackBackup ( nodeStack );
- emptyStack ( bal_nodeStack );
-
- while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL )
- {
- temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack );
- temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen );
- }
-
- stackRecover ( nodeStack );
- palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack );
- stackRecover ( nodeStack );
-
- if ( palindrome )
- {
- merge_linearV2 ( 0, nodeStack, node_c, fp );
- //printf("edge is palindrome with length %d\n",temp_edge.length);
- }
- else
- {
- merge_linearV2 ( 1, nodeStack, node_c, fp );
- }
- }
- } //every possible incoming edges
-
- return 0;
+ int node_c, palindrome;
+ unsigned char flag;
+ KMER_PT *ite_pt, *temp_pt;
+ Kmer word1, bal_word1;
+ char ch1;
+ /*
+ if (node1->linear || node1->deleted)
+ {
+ return 0;
+ }
+ // */
+ // ignore floating loop
+ word1 = node1->seq;
+ bal_word1 = reverseComplement ( word1, overlaplen );
+
+ // linear structure
+ for ( ch1 = 0; ch1 < 4; ch1++ ) // for every node on outgoing list
+ {
+ flag = get_kmer_right_cov ( *node1, ch1 );
+
+ if ( !flag )
+ {
+ continue;
+ }
+
+ emptyStack ( nodeStack );
+ temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
+ temp_pt->node = node1;
+ temp_pt->isSmaller = 1;
+ temp_pt->kmer = word1;
+ stringBeads ( temp_pt, ch1, &node_c );
+
+ //printf("%d nodes\n",node_c);
+ if ( node_c < 2 )
+ {
+ fprintf ( stderr, "%d nodes in this line!!!!!!!!!!!\n", node_c );
+ }
+ else
+ {
+ //make a reverse complement node list
+ stackBackup ( nodeStack );
+ emptyStack ( bal_nodeStack );
+
+ while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL )
+ {
+ temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack );
+ temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen );
+ }
+
+ stackRecover ( nodeStack );
+ palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack );
+ stackRecover ( nodeStack );
+
+ if ( palindrome )
+ {
+ merge_linearV2 ( 0, nodeStack, node_c, fp );
+ }
+ else
+ {
+ merge_linearV2 ( 1, nodeStack, node_c, fp );
+ }
+ }
+ } //every possible outgoing edges
+
+ for ( ch1 = 0; ch1 < 4; ch1++ ) // for every node on incoming list
+ {
+ flag = get_kmer_left_cov ( *node1, ch1 );
+
+ if ( !flag )
+ {
+ continue;
+ }
+
+ emptyStack ( nodeStack );
+ temp_pt = ( KMER_PT * ) stackPush ( nodeStack );
+ temp_pt->node = node1;
+ temp_pt->isSmaller = 0;
+ temp_pt->kmer = bal_word1;
+ stringBeads ( temp_pt, int_comp ( ch1 ), &node_c );
+
+ if ( node_c < 2 )
+ {
+ fprintf ( stderr, "%d nodes in this line!!!!!!!!!!!\n", node_c );
+ }
+ else
+ {
+ //make a reverse complement node list
+ stackBackup ( nodeStack );
+ emptyStack ( bal_nodeStack );
+
+ while ( ( ite_pt = ( KMER_PT * ) stackPop ( nodeStack ) ) != NULL )
+ {
+ temp_pt = ( KMER_PT * ) stackPush ( bal_nodeStack );
+ temp_pt->kmer = reverseComplement ( ite_pt->kmer, overlaplen );
+ }
+
+ stackRecover ( nodeStack );
+ palindrome = check_iden_kmerList ( nodeStack, bal_nodeStack );
+ stackRecover ( nodeStack );
+
+ if ( palindrome )
+ {
+ merge_linearV2 ( 0, nodeStack, node_c, fp );
+ //printf("edge is palindrome with length %d\n",temp_edge.length);
+ }
+ else
+ {
+ merge_linearV2 ( 1, nodeStack, node_c, fp );
+ }
+ }
+ } //every possible incoming edges
+
+ return 0;
}
/*************************************************
@@ -363,51 +363,51 @@ Output:
Return:
None.
*************************************************/
-void make_edge ( gzFile * fp )
+void make_edge ( gzFile *fp )
{
- int i = 0;
- kmer_t * node1;
- KmerSet * set;
- KmerSetsPatch = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
-
- for ( i = 0; i < thrd_num; i++ )
- {
- KmerSetsPatch[i] = init_kmerset ( 1000, K_LOAD_FACTOR );
- }
-
- nodeStack = ( STACK * ) createStack ( KMERPTBLOCKSIZE, sizeof ( KMER_PT ) );
- bal_nodeStack = ( STACK * ) createStack ( KMERPTBLOCKSIZE, sizeof ( KMER_PT ) );
- edge_c = nodeCounter = 0;
- edgeCounter = 0;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- set = KmerSets[i];
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- node1 = set->array + set->iter_ptr;
-
- // /*
- if ( !node1->linear && !node1->deleted )
- {
- startEdgeFromNode ( node1, fp );
- }
-
- // */
- // startEdgeFromNode (node1, fp);
- }
-
- set->iter_ptr++;
- }
- }
-
- fprintf ( stderr, "%d (%d) edge(s) and %d extra node(s) constructed.\n", edge_c, edgeCounter, nodeCounter );
- freeStack ( nodeStack );
- freeStack ( bal_nodeStack );
+ int i = 0;
+ kmer_t *node1;
+ KmerSet *set;
+ KmerSetsPatch = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ KmerSetsPatch[i] = init_kmerset ( 1000, K_LOAD_FACTOR );
+ }
+
+ nodeStack = ( STACK * ) createStack ( KMERPTBLOCKSIZE, sizeof ( KMER_PT ) );
+ bal_nodeStack = ( STACK * ) createStack ( KMERPTBLOCKSIZE, sizeof ( KMER_PT ) );
+ edge_c = nodeCounter = 0;
+ edgeCounter = 0;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ set = KmerSets[i];
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ node1 = set->array + set->iter_ptr;
+
+ // /*
+ if ( !node1->linear && !node1->deleted )
+ {
+ startEdgeFromNode ( node1, fp );
+ }
+
+ // */
+ // startEdgeFromNode (node1, fp);
+ }
+
+ set->iter_ptr++;
+ }
+ }
+
+ fprintf ( stderr, "%d (%d) edge(s) and %d extra node(s) constructed.\n", edge_c, edgeCounter, nodeCounter );
+ freeStack ( nodeStack );
+ freeStack ( bal_nodeStack );
}
@@ -427,185 +427,185 @@ Output:
Return:
None.
*************************************************/
-static void merge_linearV2 ( char bal_edge, STACK * nStack, int count, gzFile * fp )
+static void merge_linearV2 ( char bal_edge, STACK *nStack, int count, gzFile *fp )
{
- int length, char_index;
- preEDGE * newedge;
- kmer_t * del_node, *longNode;
- char * tightSeq, firstCh;
- long long symbol = 0;
- int len_tSeq;
- Kmer wordplus, bal_wordplus;
- ubyte8 hash_ban;
- KMER_PT * last_np = ( KMER_PT * ) stackPop ( nStack );
- KMER_PT * second_last_np = ( KMER_PT * ) stackPop ( nStack );
- KMER_PT * first_np, *second_np = NULL;
- KMER_PT * temp;
- boolean found;
- int setPicker;
- length = count - 1;
- len_tSeq = length;
-
- if ( len_tSeq >= edge_length_limit )
- {
- tightSeq = ( char * ) ckalloc ( len_tSeq * sizeof ( char ) );
- }
- else
- {
- tightSeq = edge_seq;
- }
-
- char_index = length - 1;
- newedge = &temp_edge;
- newedge->to_node = last_np->kmer;
- newedge->length = length;
- newedge->bal_edge = bal_edge;
- tightSeq[char_index--] = lastCharInKmer ( last_np->kmer );
- firstCh = firstCharInKmer ( second_last_np->kmer );
- dislink2prevUncertain ( last_np->node, firstCh, last_np->isSmaller );
- stackRecover ( nStack );
-
- while ( nStack->item_c > 1 )
- {
- second_np = ( KMER_PT * ) stackPop ( nStack );
- }
-
- first_np = ( KMER_PT * ) stackPop ( nStack );
- //unlink first node to the second one
- dislink2nextUncertain ( first_np->node, lastCharInKmer ( second_np->kmer ), first_np->isSmaller );
- //printf("from %llx, to %llx\n",first_np->node->seq,last_np->node->seq);
- //now temp is the last node in line, out_node is the second last node in line
- newedge->from_node = first_np->kmer;
-
- //create a long kmer for edge with length 1
- if ( length == 1 )
- {
- nodeCounter++;
- wordplus = KmerPlus ( newedge->from_node, lastCharInKmer ( newedge->to_node ) );
- bal_wordplus = reverseComplement ( wordplus, overlaplen + 1 );
- /*
- Kmer temp = KmerPlus(reverseComplement(newedge->to_node,overlaplen),
- lastCharInKmer(reverseComplement(newedge->from_node,overlaplen)));
- fprintf(stderr,"(%llx %llx) (%llx %llx) (%llx %llx)\n",
- wordplus.high,wordplus.low,temp.high,temp.low,
- bal_wordplus.high,bal_wordplus.low);
- */
- edge_c++;
- edgeCounter++;
-
- if ( KmerSmaller ( wordplus, bal_wordplus ) )
- {
- hash_ban = hash_kmer ( wordplus );
- setPicker = hash_ban % thrd_num;
- found = put_kmerset ( KmerSetsPatch[setPicker], wordplus, 4, 4, &longNode );
-
- if ( found )
- {
- fprintf ( stderr, "LongNode " );
- PrintKmer ( stderr, wordplus );
- fprintf ( stderr, " already exist.\n" );
- /*
- #ifdef MER127
- fprintf (stderr,"longNode %llx %llx %llx %llx already exist\n", wordplus.high1, wordplus.low1, wordplus.high2, wordplus.low2);
- #else
- fprintf (stderr,"longNode %llx %llx already exist\n", wordplus.high, wordplus.low);
- #endif
- */
- }
-
- longNode->l_links = edge_c;
- longNode->twin = ( unsigned char ) ( bal_edge + 1 );
- }
- else
- {
- hash_ban = hash_kmer ( bal_wordplus );
- setPicker = hash_ban % thrd_num;
- found = put_kmerset ( KmerSetsPatch[setPicker], bal_wordplus, 4, 4, &longNode );
-
- if ( found )
- {
- fprintf ( stderr, "LongNode " );
- PrintKmer ( stderr, wordplus );
- fprintf ( stderr, " already exist.\n" );
- /*
- #ifdef MER127
- fprintf (stderr,"longNode %llx %llx %llx %llx already exist\n", wordplus.high1, wordplus.low1, wordplus.high2, wordplus.low2);
- #else
- fprintf (stderr,"longNode %llx %llx already exist\n", bal_wordplus.high, bal_wordplus.low);
- #endif
- */
- }
-
- longNode->l_links = edge_c + bal_edge;
- longNode->twin = ( unsigned char ) ( -bal_edge + 1 );
- }
- }
- else
- {
- edge_c++;
- edgeCounter++;
- }
-
- stackRecover ( nStack );
- //mark all the internal nodes
- temp = ( KMER_PT * ) stackPop ( nStack );
-
- while ( nStack->item_c > 1 )
- {
- temp = ( KMER_PT * ) stackPop ( nStack );
- del_node = temp->node;
- del_node->inEdge = 1;
- symbol += get_kmer_left_covs ( *del_node );
- tightSeq[char_index--] = lastCharInKmer ( temp->kmer );
- }
-
- stackRecover ( nStack );
- temp = ( KMER_PT * ) stackPop ( nStack );
-
- while ( nStack->item_c > 1 )
- {
- temp = ( KMER_PT * ) stackPop ( nStack );
- del_node = temp->node;
- del_node->inEdge = 1;
-
- if ( temp->isSmaller )
- {
- del_node->l_links = edge_c;
- del_node->twin = ( unsigned char ) ( bal_edge + 1 );
- }
- else
- {
- del_node->l_links = edge_c + bal_edge;
- del_node->twin = ( unsigned char ) ( -bal_edge + 1 );
- }
- }
-
- newedge->seq = tightSeq;
-
- if ( length > 1 )
- {
- newedge->cvg = symbol / ( length - 1 ) * 10 > MaxEdgeCov ? MaxEdgeCov : symbol / ( length - 1 ) * 10;
- }
- else
- {
- newedge->cvg = 0;
- }
-
- output_1edge ( newedge, fp );
-
- if ( len_tSeq >= edge_length_limit )
- {
- free ( ( void * ) tightSeq );
- }
-
- edge_c += bal_edge;
-
- if ( edge_c % 10000000 == 0 )
- {
- fprintf ( stderr, "--- %d edge(s) built.\n", edge_c );
- }
-
- return;
+ int length, char_index;
+ preEDGE *newedge;
+ kmer_t *del_node, *longNode;
+ char *tightSeq, firstCh;
+ long long symbol = 0;
+ int len_tSeq;
+ Kmer wordplus, bal_wordplus;
+ ubyte8 hash_ban;
+ KMER_PT *last_np = ( KMER_PT * ) stackPop ( nStack );
+ KMER_PT *second_last_np = ( KMER_PT * ) stackPop ( nStack );
+ KMER_PT *first_np, *second_np = NULL;
+ KMER_PT *temp;
+ boolean found;
+ int setPicker;
+ length = count - 1;
+ len_tSeq = length;
+
+ if ( len_tSeq >= edge_length_limit )
+ {
+ tightSeq = ( char * ) ckalloc ( len_tSeq * sizeof ( char ) );
+ }
+ else
+ {
+ tightSeq = edge_seq;
+ }
+
+ char_index = length - 1;
+ newedge = &temp_edge;
+ newedge->to_node = last_np->kmer;
+ newedge->length = length;
+ newedge->bal_edge = bal_edge;
+ tightSeq[char_index--] = lastCharInKmer ( last_np->kmer );
+ firstCh = firstCharInKmer ( second_last_np->kmer );
+ dislink2prevUncertain ( last_np->node, firstCh, last_np->isSmaller );
+ stackRecover ( nStack );
+
+ while ( nStack->item_c > 1 )
+ {
+ second_np = ( KMER_PT * ) stackPop ( nStack );
+ }
+
+ first_np = ( KMER_PT * ) stackPop ( nStack );
+ //unlink first node to the second one
+ dislink2nextUncertain ( first_np->node, lastCharInKmer ( second_np->kmer ), first_np->isSmaller );
+ //printf("from %llx, to %llx\n",first_np->node->seq,last_np->node->seq);
+ //now temp is the last node in line, out_node is the second last node in line
+ newedge->from_node = first_np->kmer;
+
+ //create a long kmer for edge with length 1
+ if ( length == 1 )
+ {
+ nodeCounter++;
+ wordplus = KmerPlus ( newedge->from_node, lastCharInKmer ( newedge->to_node ) );
+ bal_wordplus = reverseComplement ( wordplus, overlaplen + 1 );
+ /*
+ Kmer temp = KmerPlus(reverseComplement(newedge->to_node,overlaplen),
+ lastCharInKmer(reverseComplement(newedge->from_node,overlaplen)));
+ fprintf(stderr,"(%llx %llx) (%llx %llx) (%llx %llx)\n",
+ wordplus.high,wordplus.low,temp.high,temp.low,
+ bal_wordplus.high,bal_wordplus.low);
+ */
+ edge_c++;
+ edgeCounter++;
+
+ if ( KmerSmaller ( wordplus, bal_wordplus ) )
+ {
+ hash_ban = hash_kmer ( wordplus );
+ setPicker = hash_ban % thrd_num;
+ found = put_kmerset ( KmerSetsPatch[setPicker], wordplus, 4, 4, &longNode );
+
+ if ( found )
+ {
+ fprintf ( stderr, "LongNode " );
+ PrintKmer ( stderr, wordplus );
+ fprintf ( stderr, " already exist.\n" );
+ /*
+ #ifdef MER127
+ fprintf (stderr,"longNode %llx %llx %llx %llx already exist\n", wordplus.high1, wordplus.low1, wordplus.high2, wordplus.low2);
+ #else
+ fprintf (stderr,"longNode %llx %llx already exist\n", wordplus.high, wordplus.low);
+ #endif
+ */
+ }
+
+ longNode->l_links = edge_c;
+ longNode->twin = ( unsigned char ) ( bal_edge + 1 );
+ }
+ else
+ {
+ hash_ban = hash_kmer ( bal_wordplus );
+ setPicker = hash_ban % thrd_num;
+ found = put_kmerset ( KmerSetsPatch[setPicker], bal_wordplus, 4, 4, &longNode );
+
+ if ( found )
+ {
+ fprintf ( stderr, "LongNode " );
+ PrintKmer ( stderr, wordplus );
+ fprintf ( stderr, " already exist.\n" );
+ /*
+ #ifdef MER127
+ fprintf (stderr,"longNode %llx %llx %llx %llx already exist\n", wordplus.high1, wordplus.low1, wordplus.high2, wordplus.low2);
+ #else
+ fprintf (stderr,"longNode %llx %llx already exist\n", bal_wordplus.high, bal_wordplus.low);
+ #endif
+ */
+ }
+
+ longNode->l_links = edge_c + bal_edge;
+ longNode->twin = ( unsigned char ) ( -bal_edge + 1 );
+ }
+ }
+ else
+ {
+ edge_c++;
+ edgeCounter++;
+ }
+
+ stackRecover ( nStack );
+ //mark all the internal nodes
+ temp = ( KMER_PT * ) stackPop ( nStack );
+
+ while ( nStack->item_c > 1 )
+ {
+ temp = ( KMER_PT * ) stackPop ( nStack );
+ del_node = temp->node;
+ del_node->inEdge = 1;
+ symbol += get_kmer_left_covs ( *del_node );
+ tightSeq[char_index--] = lastCharInKmer ( temp->kmer );
+ }
+
+ stackRecover ( nStack );
+ temp = ( KMER_PT * ) stackPop ( nStack );
+
+ while ( nStack->item_c > 1 )
+ {
+ temp = ( KMER_PT * ) stackPop ( nStack );
+ del_node = temp->node;
+ del_node->inEdge = 1;
+
+ if ( temp->isSmaller )
+ {
+ del_node->l_links = edge_c;
+ del_node->twin = ( unsigned char ) ( bal_edge + 1 );
+ }
+ else
+ {
+ del_node->l_links = edge_c + bal_edge;
+ del_node->twin = ( unsigned char ) ( -bal_edge + 1 );
+ }
+ }
+
+ newedge->seq = tightSeq;
+
+ if ( length > 1 )
+ {
+ newedge->cvg = symbol / ( length - 1 ) * 10 > MaxEdgeCov ? MaxEdgeCov : symbol / ( length - 1 ) * 10;
+ }
+ else
+ {
+ newedge->cvg = 0;
+ }
+
+ output_1edge ( newedge, fp );
+
+ if ( len_tSeq >= edge_length_limit )
+ {
+ free ( ( void * ) tightSeq );
+ }
+
+ edge_c += bal_edge;
+
+ if ( edge_c % 10000000 == 0 )
+ {
+ fprintf ( stderr, "--- %d edge(s) built.\n", edge_c );
+ }
+
+ return;
}
/*************************************************
@@ -621,29 +621,29 @@ Output:
Return:
1 if the two statcks are equal.
*************************************************/
-static int check_iden_kmerList ( STACK * stack1, STACK * stack2 )
+static int check_iden_kmerList ( STACK *stack1, STACK *stack2 )
{
- KMER_PT * ite1, *ite2;
-
- if ( !stack1->item_c || !stack2->item_c ) // one of them is empty
- {
- return 0;
- }
-
- while ( ( ite1 = ( KMER_PT * ) stackPop ( stack1 ) ) != NULL && ( ite2 = ( KMER_PT * ) stackPop ( stack2 ) ) != NULL )
- {
- if ( !KmerEqual ( ite1->kmer, ite2->kmer ) )
- {
- return 0;
- }
- }
-
- if ( stack1->item_c || stack2->item_c ) // one of them is not empty
- {
- return 0;
- }
- else
- {
- return 1;
- }
+ KMER_PT *ite1, *ite2;
+
+ if ( !stack1->item_c || !stack2->item_c ) // one of them is empty
+ {
+ return 0;
+ }
+
+ while ( ( ite1 = ( KMER_PT * ) stackPop ( stack1 ) ) != NULL && ( ite2 = ( KMER_PT * ) stackPop ( stack2 ) ) != NULL )
+ {
+ if ( !KmerEqual ( ite1->kmer, ite2->kmer ) )
+ {
+ return 0;
+ }
+ }
+
+ if ( stack1->item_c || stack2->item_c ) // one of them is not empty
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
}
diff --git a/standardPregraph/orderContig.c b/standardPregraph/orderContig.c
index b79ae61..258f71c 100644
--- a/standardPregraph/orderContig.c
+++ b/standardPregraph/orderContig.c
@@ -34,10 +34,10 @@ static int orienCounter2;
static int throughCounter;
static int breakPointAtRepeat = 0;
-static FILE * snp_fp = NULL;
+static FILE *snp_fp = NULL;
-static DARRAY * solidArray;
-static DARRAY * tempArray;
+static DARRAY *solidArray;
+static DARRAY *tempArray;
static int solidCounter;
@@ -60,21 +60,21 @@ static unsigned int uCntNodeArr2[MaxCntNode];
static int dCntGapArr2[MaxCntNode];
static int uCntGapArr2[MaxCntNode];
-static unsigned int * cntNodeArr;
-static int * cntGapArr;
+static unsigned int *cntNodeArr;
+static int *cntGapArr;
static unsigned int nodesInSubInOrder[MaxNodeInSub];
static int nodeDistanceInOrder[MaxNodeInSub];
-static DARRAY * scaf3, *scaf5;
-static DARRAY * gap3, *gap5;
+static DARRAY *scaf3, *scaf5;
+static DARRAY *gap3, *gap5;
static unsigned int downstreamCTG[MAXCinBetween];
static unsigned int upstreamCTG[MAXCinBetween];
static int dsCtgCounter;
static int usCtgCounter;
-static CONNECT * checkConnect ( unsigned int from_c, unsigned int to_c );
+static CONNECT *checkConnect ( unsigned int from_c, unsigned int to_c );
static int maskPuzzle ( int num_connect, unsigned int contigLen );
static void freezing();
static boolean checkOverlapInBetween ( double tolerance );
@@ -88,7 +88,7 @@ static void smallScaf();
static void clearNewInsFlag();
static void detectBreakScaff();
static void detectBreakScaf();
-static boolean checkSimple ( DARRAY * ctgArray, int count );
+static boolean checkSimple ( DARRAY *ctgArray, int count );
static void checkCircle();
/*************************************************
@@ -103,31 +103,31 @@ Output:
Return:
1 if all files were OK.
*************************************************/
-boolean checkFiles4Scaff ( char * infile )
+boolean checkFiles4Scaff ( char *infile )
{
- char name[7][256];
- boolean filesOK = 1;
- int i = 0;
- sprintf ( name[0], "%s.Arc", infile );
- sprintf ( name[1], "%s.contig", infile );
- sprintf ( name[2], "%s.peGrads", infile );
- sprintf ( name[3], "%s.preGraphBasic", infile );
- sprintf ( name[4], "%s.updated.edge", infile );
- sprintf ( name[5], "%s.readOnContig.gz", infile );
-
- for ( ; i < 6; i++ )
- {
- filesOK = check_file ( name[i] );
-
- if ( !filesOK )
- {
- fprintf ( stderr, "%s: no such file or empty file!\n\n", name[i] );
- return filesOK;
- }
- }
-
- fprintf ( stderr, "Files for scaffold construction are OK.\n\n" );
- return filesOK;
+ char name[7][256];
+ boolean filesOK = 1;
+ int i = 0;
+ sprintf ( name[0], "%s.Arc", infile );
+ sprintf ( name[1], "%s.contig", infile );
+ sprintf ( name[2], "%s.peGrads", infile );
+ sprintf ( name[3], "%s.preGraphBasic", infile );
+ sprintf ( name[4], "%s.updated.edge", infile );
+ sprintf ( name[5], "%s.readOnContig.gz", infile );
+
+ for ( ; i < 6; i++ )
+ {
+ filesOK = check_file ( name[i] );
+
+ if ( !filesOK )
+ {
+ fprintf ( stderr, "%s: no such file or empty file!\n\n", name[i] );
+ return filesOK;
+ }
+ }
+
+ fprintf ( stderr, "Files for scaffold construction are OK.\n\n" );
+ return filesOK;
}
@@ -143,50 +143,56 @@ Output:
Return:
The pointer to connection if only one connection was found.
*************************************************/
-static CONNECT * getBindCnt ( unsigned int ctg )
+static CONNECT *getBindCnt ( unsigned int ctg )
{
- CONNECT * ite_cnt;
- CONNECT * bindCnt = NULL;
- CONNECT * temp_cnt = NULL;
- CONNECT * temp3_cnt = NULL;
- int count = 0;
- int count2 = 0;
- int count3 = 0;
- ite_cnt = contig_array[ctg].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->nextInScaf )
- {
- count++;
- bindCnt = ite_cnt;
- }
-
- if ( ite_cnt->prevInScaf )
- {
- temp_cnt = ite_cnt;
- count2++;
- }
-
- if ( ite_cnt->singleInScaf )
- {
- temp3_cnt = ite_cnt;
- count3++;
- }
-
- ite_cnt = ite_cnt->next;
- }
-
- if ( count == 1 )
- { return bindCnt; }
-
- if ( count == 0 && count2 == 1 )
- { return temp_cnt; }
-
- if ( count == 0 && count2 == 0 && count3 == 1 )
- { return temp3_cnt; }
-
- return NULL;
+ CONNECT *ite_cnt;
+ CONNECT *bindCnt = NULL;
+ CONNECT *temp_cnt = NULL;
+ CONNECT *temp3_cnt = NULL;
+ int count = 0;
+ int count2 = 0;
+ int count3 = 0;
+ ite_cnt = contig_array[ctg].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->nextInScaf )
+ {
+ count++;
+ bindCnt = ite_cnt;
+ }
+
+ if ( ite_cnt->prevInScaf )
+ {
+ temp_cnt = ite_cnt;
+ count2++;
+ }
+
+ if ( ite_cnt->singleInScaf )
+ {
+ temp3_cnt = ite_cnt;
+ count3++;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ if ( count == 1 )
+ {
+ return bindCnt;
+ }
+
+ if ( count == 0 && count2 == 1 )
+ {
+ return temp_cnt;
+ }
+
+ if ( count == 0 && count2 == 0 && count3 == 1 )
+ {
+ return temp3_cnt;
+ }
+
+ return NULL;
}
/*************************************************
@@ -210,63 +216,67 @@ Return:
1 if creation successed.
*************************************************/
static boolean createAnalogousCnt ( unsigned int sourceStart,
- CONNECT * originCnt, int gap,
+ CONNECT *originCnt, int gap,
unsigned int targetStart, unsigned int targetStop )
{
- CONNECT * temp_cnt;
- unsigned int balTargetStart = getTwinCtg ( targetStart );
- unsigned int balTargetStop = getTwinCtg ( targetStop );
- unsigned int balSourceStart = getTwinCtg ( sourceStart );
- unsigned int balSourceStop = getTwinCtg ( originCnt->contigID );
- boolean change_flag = 0;
- int add_weight = originCnt->weight;
-
- if ( gap < GapLowerBound )
- {
- gapCounter++;
- originCnt->deleted = 1;
- temp_cnt = getCntBetween ( balSourceStop, balSourceStart );
- temp_cnt->deleted = 1;
- return change_flag;
- }
-
- int startLen = ( int ) contig_array[targetStart].length;
- int stopLen = ( int ) contig_array[targetStop].length;
-
- if ( gap < -overlaplen )
- {
- if ( ( int ) contig_array[targetStart].length - overlaplen <= -gap && ( int ) contig_array[targetStart].length > 5 * overlaplen )
- {
- unsigned int tmp_id = targetStart;
- targetStart = targetStop;
- targetStop = tmp_id;
- tmp_id = balTargetStart;
- balTargetStart = balTargetStop;
- balTargetStop = tmp_id;
- gap = -gap;
- change_flag = 1;
- }
- else
- {
- gapCounter++;
- return change_flag;
- }
- }
-
- originCnt->deleted = 1;
- temp_cnt = getCntBetween ( balSourceStop, balSourceStart );
- temp_cnt->deleted = 1;
- temp_cnt = add1Connect ( targetStart, targetStop, gap, add_weight, 1 );
-
- if ( temp_cnt )
- { temp_cnt->inherit = 1; }
-
- temp_cnt = add1Connect ( balTargetStop, balTargetStart, gap, add_weight, 1 );
-
- if ( temp_cnt )
- { temp_cnt->inherit = 1; }
-
- return change_flag;
+ CONNECT *temp_cnt;
+ unsigned int balTargetStart = getTwinCtg ( targetStart );
+ unsigned int balTargetStop = getTwinCtg ( targetStop );
+ unsigned int balSourceStart = getTwinCtg ( sourceStart );
+ unsigned int balSourceStop = getTwinCtg ( originCnt->contigID );
+ boolean change_flag = 0;
+ int add_weight = originCnt->weight;
+
+ if ( gap < GapLowerBound )
+ {
+ gapCounter++;
+ originCnt->deleted = 1;
+ temp_cnt = getCntBetween ( balSourceStop, balSourceStart );
+ temp_cnt->deleted = 1;
+ return change_flag;
+ }
+
+ int startLen = ( int ) contig_array[targetStart].length;
+ int stopLen = ( int ) contig_array[targetStop].length;
+
+ if ( gap < -overlaplen )
+ {
+ if ( ( int ) contig_array[targetStart].length - overlaplen <= -gap && ( int ) contig_array[targetStart].length > 5 * overlaplen )
+ {
+ unsigned int tmp_id = targetStart;
+ targetStart = targetStop;
+ targetStop = tmp_id;
+ tmp_id = balTargetStart;
+ balTargetStart = balTargetStop;
+ balTargetStop = tmp_id;
+ gap = -gap;
+ change_flag = 1;
+ }
+ else
+ {
+ gapCounter++;
+ return change_flag;
+ }
+ }
+
+ originCnt->deleted = 1;
+ temp_cnt = getCntBetween ( balSourceStop, balSourceStart );
+ temp_cnt->deleted = 1;
+ temp_cnt = add1Connect ( targetStart, targetStop, gap, add_weight, 1 );
+
+ if ( temp_cnt )
+ {
+ temp_cnt->inherit = 1;
+ }
+
+ temp_cnt = add1Connect ( balTargetStop, balTargetStart, gap, add_weight, 1 );
+
+ if ( temp_cnt )
+ {
+ temp_cnt->inherit = 1;
+ }
+
+ return change_flag;
}
@@ -289,66 +299,78 @@ Return:
*************************************************/
static void add1LongPEcov ( unsigned int fromCtg, unsigned int toCtg, int weight )
{
- //check if they are on the same scaff
- if ( contig_array[fromCtg].from_vt != contig_array[toCtg].from_vt ||
- contig_array[fromCtg].to_vt != contig_array[toCtg].to_vt )
- {
- fprintf ( stderr, "Warning from add1LongPEcov: contig %d and %d not on the same scaffold\n",
- fromCtg, toCtg );
- return;
- }
-
- if ( contig_array[fromCtg].indexInScaf >= contig_array[toCtg].indexInScaf )
- {
- fprintf ( stderr, "Warning from add1LongPEcov: wrong about order between contig %d and %d\n",
- fromCtg, toCtg );
- return;
- }
-
- CONNECT * bindCnt;
- unsigned int prevCtg = fromCtg;
- bindCnt = getBindCnt ( fromCtg );
-
- while ( bindCnt )
- {
- if ( bindCnt->maxGap + weight <= 1000 )
- { bindCnt->maxGap += weight; }
- else
- { bindCnt->maxGap = 1000; }
-
- if ( fromCtg == 0 && toCtg == 0 )
- fprintf ( stderr, "link (%d %d ) covered by link (%d %d), wt %d\n",
- prevCtg, bindCnt->contigID, fromCtg, toCtg, weight );
-
- if ( bindCnt->contigID == toCtg )
- { break; }
-
- prevCtg = bindCnt->contigID;
- bindCnt = bindCnt->nextInScaf;
- }
-
- unsigned int bal_fc = getTwinCtg ( fromCtg );
- unsigned int bal_tc = getTwinCtg ( toCtg );
- bindCnt = getBindCnt ( bal_tc );
- prevCtg = bal_tc;
-
- while ( bindCnt )
- {
- if ( bindCnt->maxGap + weight <= 1000 )
- { bindCnt->maxGap += weight; }
- else
- { bindCnt->maxGap = 1000; }
-
- if ( fromCtg == 0 && toCtg == 0 )
- fprintf ( stderr, "link (%d %d ) covered by link (%d %d), wt %d\n",
- prevCtg, bindCnt->contigID, fromCtg, toCtg, weight );
-
- if ( bindCnt->contigID == bal_fc )
- { return; }
-
- prevCtg = bindCnt->contigID;
- bindCnt = bindCnt->nextInScaf;
- }
+ //check if they are on the same scaff
+ if ( contig_array[fromCtg].from_vt != contig_array[toCtg].from_vt ||
+ contig_array[fromCtg].to_vt != contig_array[toCtg].to_vt )
+ {
+ fprintf ( stderr, "Warning from add1LongPEcov: contig %d and %d not on the same scaffold\n",
+ fromCtg, toCtg );
+ return;
+ }
+
+ if ( contig_array[fromCtg].indexInScaf >= contig_array[toCtg].indexInScaf )
+ {
+ fprintf ( stderr, "Warning from add1LongPEcov: wrong about order between contig %d and %d\n",
+ fromCtg, toCtg );
+ return;
+ }
+
+ CONNECT *bindCnt;
+ unsigned int prevCtg = fromCtg;
+ bindCnt = getBindCnt ( fromCtg );
+
+ while ( bindCnt )
+ {
+ if ( bindCnt->maxGap + weight <= 1000 )
+ {
+ bindCnt->maxGap += weight;
+ }
+ else
+ {
+ bindCnt->maxGap = 1000;
+ }
+
+ if ( fromCtg == 0 && toCtg == 0 )
+ fprintf ( stderr, "link (%d %d ) covered by link (%d %d), wt %d\n",
+ prevCtg, bindCnt->contigID, fromCtg, toCtg, weight );
+
+ if ( bindCnt->contigID == toCtg )
+ {
+ break;
+ }
+
+ prevCtg = bindCnt->contigID;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ unsigned int bal_fc = getTwinCtg ( fromCtg );
+ unsigned int bal_tc = getTwinCtg ( toCtg );
+ bindCnt = getBindCnt ( bal_tc );
+ prevCtg = bal_tc;
+
+ while ( bindCnt )
+ {
+ if ( bindCnt->maxGap + weight <= 1000 )
+ {
+ bindCnt->maxGap += weight;
+ }
+ else
+ {
+ bindCnt->maxGap = 1000;
+ }
+
+ if ( fromCtg == 0 && toCtg == 0 )
+ fprintf ( stderr, "link (%d %d ) covered by link (%d %d), wt %d\n",
+ prevCtg, bindCnt->contigID, fromCtg, toCtg, weight );
+
+ if ( bindCnt->contigID == bal_fc )
+ {
+ return;
+ }
+
+ prevCtg = bindCnt->contigID;
+ bindCnt = bindCnt->nextInScaf;
+ }
}
/*************************************************
@@ -370,242 +392,262 @@ Return:
*************************************************/
static void downSlide()
{
- int len = 0, gap;
- unsigned int i;
- CONNECT * ite_cnt, *bindCnt, *temp_cnt;
- unsigned int bottomCtg, topCtg, bal_i;
- unsigned int targetCtg, bal_target;
- boolean getThrough, orienConflict;
- int slideLen, slideLen2;
- int slideexchange1 = 0, slideexchange2 = 0;
- orienCounter = throughCounter = 0;
- orienCounter2 = 0;
- int slidebreak1 = 0, slidebreak2 = 0, slidebreak = 0, recoverCnt = 0;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].mask || !contig_array[i].downwardConnect )
- { continue; }
-
- bindCnt = getBindCnt ( i );
-
- if ( !bindCnt )
- { continue; }
-
- bal_i = getTwinCtg ( i );
- len = slideLen = 0;
- bottomCtg = i;
-
- //find the last unmasked contig in this binding
- while ( bindCnt->nextInScaf )
- {
- len += bindCnt->gapLen + contig_array[bindCnt->contigID].length;
-
- if ( contig_array[bindCnt->contigID].mask == 0 )
- {
- bottomCtg = bindCnt->contigID;
- slideLen = len;
- }
-
- bindCnt = bindCnt->nextInScaf;
- }
-
- len += bindCnt->gapLen + contig_array[bindCnt->contigID].length;
-
- if ( contig_array[bindCnt->contigID].mask == 0 || bottomCtg == 0 )
- {
- bottomCtg = bindCnt->contigID;
- slideLen = len;
- }
-
- //check each connetion from long pair ends
- ite_cnt = contig_array[i].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->deleted || ite_cnt->mask || ite_cnt->singleInScaf
- || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- targetCtg = ite_cnt->contigID;
-
- if ( contig_array[i].from_vt == contig_array[targetCtg].from_vt ) // on the same scaff
- {
- if ( contig_array[i].indexInScaf > contig_array[targetCtg].indexInScaf )
- { orienCounter++; }
- else
- { throughCounter++; }
-
- setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- if ( ( ins_var_idx > 0 ) && ( slideLen > Insert_size * ins_var_idx ) )
- {
- setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
- ite_cnt = ite_cnt->next;
- slidebreak1++;
- continue;
- }
-
- //contig i and targetctg is not in same scaffold
- //check if this connection conflicts with previous scaffold orientationally
- temp_cnt = getBindCnt ( targetCtg );
- orienConflict = 0;
-
- if ( temp_cnt )
- {
- while ( temp_cnt->nextInScaf )
- {
- if ( temp_cnt->contigID == i )
- {
- orienConflict = 1;
- fprintf ( stderr, "Warning from downSlide: still on the same scaff: %d and %d\n"
- , i, targetCtg );
- fprintf ( stderr, "on scaff %d and %d\n",
- contig_array[i].from_vt, contig_array[targetCtg].from_vt );
- fprintf ( stderr, "on bal_scaff %d and %d\n",
- contig_array[bal_target].to_vt, contig_array[bal_i].to_vt );
- break;
- }
-
- temp_cnt = temp_cnt->nextInScaf;
- }
-
- if ( temp_cnt->contigID == i )
- { orienConflict = 1; }
- }
-
- if ( orienConflict )
- {
- orienCounter++;
- orienCounter2++;
- setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- //connection path to i was not found
- //find the most top contig along previous scaffold starting with the target contig of this connection
- bal_target = getTwinCtg ( targetCtg );
- slideLen2 = 0;
-
- if ( contig_array[targetCtg].mask == 0 )
- {
- topCtg = bal_target;
- }
- else
- {
- topCtg = 0;
- }
-
- temp_cnt = getBindCnt ( bal_target );
- getThrough = len = 0;
- int slidebreak = 0;
-
- if ( temp_cnt )
- {
- //find the last contig in this binding
- while ( temp_cnt->nextInScaf )
- {
- //check if this route reaches bal_i
- if ( temp_cnt->contigID == bal_i )
- {
- fprintf ( stderr, "Warning from downSlide: (B) still on the same scaff: %d and %d (%d and %d)\n",
- i, targetCtg, bal_target, bal_i );
- fprintf ( stderr, "on scaff %d and %d\n",
- contig_array[i].from_vt, contig_array[targetCtg].from_vt );
- fprintf ( stderr, "on bal_scaff %d and %d\n",
- contig_array[bal_target].to_vt, contig_array[bal_i].to_vt );
- getThrough = 1;
- break;
- }
-
- len += temp_cnt->gapLen + contig_array[temp_cnt->contigID].length;
-
- if ( contig_array[temp_cnt->contigID].mask == 0 )
- {
- topCtg = temp_cnt->contigID;
- slideLen2 = len;
- }
-
- if ( ( ins_var_idx > 0 ) && ( len > ins_var_idx * Insert_size ) )
- {
- slidebreak = 1;
- break;
- }
-
- temp_cnt = temp_cnt->nextInScaf;
- }
-
- len += temp_cnt->gapLen + contig_array[temp_cnt->contigID].length;
-
- if ( contig_array[temp_cnt->contigID].mask == 0 || topCtg == 0 )
- {
- topCtg = temp_cnt->contigID;
- slideLen2 = len;
- }
-
- if ( slidebreak == 1 )
- {
- setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
- ite_cnt = ite_cnt->next;
- slidebreak2++;
- continue;
- }
-
- if ( temp_cnt->contigID == bal_i )
- { getThrough = 1; }
- else
- { topCtg = getTwinCtg ( topCtg ); }
- }
- else
- { topCtg = targetCtg; }
-
- if ( getThrough )
- {
- throughCounter++;
- setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- //connection path to bal_id was not found
- CONNECT * dh_cnt;
- gap = ite_cnt->gapLen - slideLen - slideLen2;
- dh_cnt = getCntBetween ( topCtg, bottomCtg );
-
- if ( dh_cnt && dh_cnt->weight >= MinWeakCut )
- {
- slideexchange1++;
- setConnectDelete ( topCtg, bottomCtg, 0, 0 );
- setConnectMask ( topCtg, bottomCtg, 0 );
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- //add a connection between bottomCtg and topCtg
- if ( bottomCtg != topCtg && ! ( i == bottomCtg && targetCtg == topCtg ) )
- {
- boolean creat_flag = createAnalogousCnt ( i, ite_cnt, gap, bottomCtg, topCtg );
-
- if ( creat_flag )
- { slideexchange2++; }
-
- if ( contig_array[bottomCtg].mask || contig_array[topCtg].mask )
- { fprintf ( stderr, "downSlide to masked contig, bottomCtg %u[mask %d], topCtg %u[mask %d]\n", bottomCtg, contig_array[bottomCtg].mask, topCtg, contig_array[topCtg].mask ); }
- }
-
- ite_cnt = ite_cnt->next;
- }
- }
-
- // fprintf(stderr,"downSliding stat:\norienConflict\tfall_inside\tslidebreak1\tslidebreak2\trecoverCnt\tslideexchange1\tslideexchange2\n%d\t%d\t%d\t%d\t%d\t%d\t%d\n",orienCounter, throughCounter, slidebreak1, slidebreak2, recoverCnt, slideexchange1, slideexchange2);
- fprintf ( stderr, "Add large insert size PE links: %d orientation-conflict links, %d contigs acrossed by normal links.\n", orienCounter, throughCounter );
+ int len = 0, gap;
+ unsigned int i;
+ CONNECT *ite_cnt, *bindCnt, *temp_cnt;
+ unsigned int bottomCtg, topCtg, bal_i;
+ unsigned int targetCtg, bal_target;
+ boolean getThrough, orienConflict;
+ int slideLen, slideLen2;
+ int slideexchange1 = 0, slideexchange2 = 0;
+ orienCounter = throughCounter = 0;
+ orienCounter2 = 0;
+ int slidebreak1 = 0, slidebreak2 = 0, slidebreak = 0, recoverCnt = 0;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].mask || !contig_array[i].downwardConnect )
+ {
+ continue;
+ }
+
+ bindCnt = getBindCnt ( i );
+
+ if ( !bindCnt )
+ {
+ continue;
+ }
+
+ bal_i = getTwinCtg ( i );
+ len = slideLen = 0;
+ bottomCtg = i;
+
+ //find the last unmasked contig in this binding
+ while ( bindCnt->nextInScaf )
+ {
+ len += bindCnt->gapLen + contig_array[bindCnt->contigID].length;
+
+ if ( contig_array[bindCnt->contigID].mask == 0 )
+ {
+ bottomCtg = bindCnt->contigID;
+ slideLen = len;
+ }
+
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ len += bindCnt->gapLen + contig_array[bindCnt->contigID].length;
+
+ if ( contig_array[bindCnt->contigID].mask == 0 || bottomCtg == 0 )
+ {
+ bottomCtg = bindCnt->contigID;
+ slideLen = len;
+ }
+
+ //check each connetion from long pair ends
+ ite_cnt = contig_array[i].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->deleted || ite_cnt->mask || ite_cnt->singleInScaf
+ || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ targetCtg = ite_cnt->contigID;
+
+ if ( contig_array[i].from_vt == contig_array[targetCtg].from_vt ) // on the same scaff
+ {
+ if ( contig_array[i].indexInScaf > contig_array[targetCtg].indexInScaf )
+ {
+ orienCounter++;
+ }
+ else
+ {
+ throughCounter++;
+ }
+
+ setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if ( ( ins_var_idx > 0 ) && ( slideLen > Insert_size * ins_var_idx ) )
+ {
+ setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
+ ite_cnt = ite_cnt->next;
+ slidebreak1++;
+ continue;
+ }
+
+ //contig i and targetctg is not in same scaffold
+ //check if this connection conflicts with previous scaffold orientationally
+ temp_cnt = getBindCnt ( targetCtg );
+ orienConflict = 0;
+
+ if ( temp_cnt )
+ {
+ while ( temp_cnt->nextInScaf )
+ {
+ if ( temp_cnt->contigID == i )
+ {
+ orienConflict = 1;
+ fprintf ( stderr, "Warning from downSlide: still on the same scaff: %d and %d\n"
+ , i, targetCtg );
+ fprintf ( stderr, "on scaff %d and %d\n",
+ contig_array[i].from_vt, contig_array[targetCtg].from_vt );
+ fprintf ( stderr, "on bal_scaff %d and %d\n",
+ contig_array[bal_target].to_vt, contig_array[bal_i].to_vt );
+ break;
+ }
+
+ temp_cnt = temp_cnt->nextInScaf;
+ }
+
+ if ( temp_cnt->contigID == i )
+ {
+ orienConflict = 1;
+ }
+ }
+
+ if ( orienConflict )
+ {
+ orienCounter++;
+ orienCounter2++;
+ setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ //connection path to i was not found
+ //find the most top contig along previous scaffold starting with the target contig of this connection
+ bal_target = getTwinCtg ( targetCtg );
+ slideLen2 = 0;
+
+ if ( contig_array[targetCtg].mask == 0 )
+ {
+ topCtg = bal_target;
+ }
+ else
+ {
+ topCtg = 0;
+ }
+
+ temp_cnt = getBindCnt ( bal_target );
+ getThrough = len = 0;
+ int slidebreak = 0;
+
+ if ( temp_cnt )
+ {
+ //find the last contig in this binding
+ while ( temp_cnt->nextInScaf )
+ {
+ //check if this route reaches bal_i
+ if ( temp_cnt->contigID == bal_i )
+ {
+ fprintf ( stderr, "Warning from downSlide: (B) still on the same scaff: %d and %d (%d and %d)\n",
+ i, targetCtg, bal_target, bal_i );
+ fprintf ( stderr, "on scaff %d and %d\n",
+ contig_array[i].from_vt, contig_array[targetCtg].from_vt );
+ fprintf ( stderr, "on bal_scaff %d and %d\n",
+ contig_array[bal_target].to_vt, contig_array[bal_i].to_vt );
+ getThrough = 1;
+ break;
+ }
+
+ len += temp_cnt->gapLen + contig_array[temp_cnt->contigID].length;
+
+ if ( contig_array[temp_cnt->contigID].mask == 0 )
+ {
+ topCtg = temp_cnt->contigID;
+ slideLen2 = len;
+ }
+
+ if ( ( ins_var_idx > 0 ) && ( len > ins_var_idx * Insert_size ) )
+ {
+ slidebreak = 1;
+ break;
+ }
+
+ temp_cnt = temp_cnt->nextInScaf;
+ }
+
+ len += temp_cnt->gapLen + contig_array[temp_cnt->contigID].length;
+
+ if ( contig_array[temp_cnt->contigID].mask == 0 || topCtg == 0 )
+ {
+ topCtg = temp_cnt->contigID;
+ slideLen2 = len;
+ }
+
+ if ( slidebreak == 1 )
+ {
+ setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
+ ite_cnt = ite_cnt->next;
+ slidebreak2++;
+ continue;
+ }
+
+ if ( temp_cnt->contigID == bal_i )
+ {
+ getThrough = 1;
+ }
+ else
+ {
+ topCtg = getTwinCtg ( topCtg );
+ }
+ }
+ else
+ {
+ topCtg = targetCtg;
+ }
+
+ if ( getThrough )
+ {
+ throughCounter++;
+ setConnectDelete ( i, ite_cnt->contigID, 1, 0 );
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ //connection path to bal_id was not found
+ CONNECT *dh_cnt;
+ gap = ite_cnt->gapLen - slideLen - slideLen2;
+ dh_cnt = getCntBetween ( topCtg, bottomCtg );
+
+ if ( dh_cnt && dh_cnt->weight >= MinWeakCut )
+ {
+ slideexchange1++;
+ setConnectDelete ( topCtg, bottomCtg, 0, 0 );
+ setConnectMask ( topCtg, bottomCtg, 0 );
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ //add a connection between bottomCtg and topCtg
+ if ( bottomCtg != topCtg && ! ( i == bottomCtg && targetCtg == topCtg ) )
+ {
+ boolean creat_flag = createAnalogousCnt ( i, ite_cnt, gap, bottomCtg, topCtg );
+
+ if ( creat_flag )
+ {
+ slideexchange2++;
+ }
+
+ if ( contig_array[bottomCtg].mask || contig_array[topCtg].mask )
+ {
+ fprintf ( stderr, "downSlide to masked contig, bottomCtg %u[mask %d], topCtg %u[mask %d]\n", bottomCtg, contig_array[bottomCtg].mask, topCtg, contig_array[topCtg].mask );
+ }
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+ }
+
+ // fprintf(stderr,"downSliding stat:\norienConflict\tfall_inside\tslidebreak1\tslidebreak2\trecoverCnt\tslideexchange1\tslideexchange2\n%d\t%d\t%d\t%d\t%d\t%d\t%d\n",orienCounter, throughCounter, slidebreak1, slidebreak2, recoverCnt, slideexchange1, slideexchange2);
+ fprintf ( stderr, "Add large insert size PE links: %d orientation-conflict links, %d contigs acrossed by normal links.\n", orienCounter, throughCounter );
}
/*************************************************
@@ -621,34 +663,34 @@ Output:
Return:
1 if setting successed.
*************************************************/
-static boolean setNextInScaf ( CONNECT * cnt, CONNECT * nextCnt )
+static boolean setNextInScaf ( CONNECT *cnt, CONNECT *nextCnt )
{
- if ( !cnt )
- {
- fprintf ( stderr, "setNextInScaf: empty pointer\n" );
- return 0;
- }
-
- if ( !nextCnt )
- {
- cnt->nextInScaf = nextCnt;
- return 1;
- }
-
- if ( cnt->mask || cnt->deleted )
- {
- fprintf ( stderr, "setNextInScaf: cnt is masked or deleted\n" );
- return 0;
- }
-
- if ( nextCnt->deleted || nextCnt->mask )
- {
- fprintf ( stderr, "setNextInScaf: nextCnt is masked or deleted\n" );
- return 0;
- }
-
- cnt->nextInScaf = nextCnt;
- return 1;
+ if ( !cnt )
+ {
+ fprintf ( stderr, "setNextInScaf: empty pointer\n" );
+ return 0;
+ }
+
+ if ( !nextCnt )
+ {
+ cnt->nextInScaf = nextCnt;
+ return 1;
+ }
+
+ if ( cnt->mask || cnt->deleted )
+ {
+ fprintf ( stderr, "setNextInScaf: cnt is masked or deleted\n" );
+ return 0;
+ }
+
+ if ( nextCnt->deleted || nextCnt->mask )
+ {
+ fprintf ( stderr, "setNextInScaf: nextCnt is masked or deleted\n" );
+ return 0;
+ }
+
+ cnt->nextInScaf = nextCnt;
+ return 1;
}
/*************************************************
@@ -664,28 +706,28 @@ Output:
Return:
1 if setting successed.
*************************************************/
-static boolean setPrevInScaf ( CONNECT * cnt, boolean flag )
+static boolean setPrevInScaf ( CONNECT *cnt, boolean flag )
{
- if ( !cnt )
- {
- fprintf ( stderr, "setPrevInScaf: empty pointer\n" );
- return 0;
- }
-
- if ( !flag )
- {
- cnt->prevInScaf = flag;
- return 1;
- }
-
- if ( cnt->mask || cnt->deleted )
- {
- fprintf ( stderr, "setPrevInScaf: cnt is masked or deleted\n" );
- return 0;
- }
-
- cnt->prevInScaf = flag;
- return 1;
+ if ( !cnt )
+ {
+ fprintf ( stderr, "setPrevInScaf: empty pointer\n" );
+ return 0;
+ }
+
+ if ( !flag )
+ {
+ cnt->prevInScaf = flag;
+ return 1;
+ }
+
+ if ( cnt->mask || cnt->deleted )
+ {
+ fprintf ( stderr, "setPrevInScaf: cnt is masked or deleted\n" );
+ return 0;
+ }
+
+ cnt->prevInScaf = flag;
+ return 1;
}
@@ -707,36 +749,38 @@ Output:
Return:
None.
*************************************************/
-static void substitueUSinScaf ( CONNECT * origin, unsigned int from_c_new )
+static void substitueUSinScaf ( CONNECT *origin, unsigned int from_c_new )
{
- if ( !origin || !origin->nextInScaf )
- { return; }
-
- unsigned int branch_c, to_c;
- unsigned int bal_branch_c, bal_to_c;
- unsigned int bal_from_c_new = getTwinCtg ( from_c_new );
- CONNECT * bal_origin, *bal_nextCNT, *prevCNT, *bal_prevCNT;
- branch_c = origin->contigID;
- to_c = origin->nextInScaf->contigID;
- bal_branch_c = getTwinCtg ( branch_c );
- bal_to_c = getTwinCtg ( to_c );
- prevCNT = checkConnect ( from_c_new, branch_c );
- bal_nextCNT = checkConnect ( bal_to_c, bal_branch_c );
-
- if ( !bal_nextCNT )
- {
- fprintf ( stderr, "substitueUSinScaf: no connect between %d and %d\n", bal_to_c, bal_branch_c );
- return;
- }
-
- bal_origin = bal_nextCNT->nextInScaf;
- bal_prevCNT = checkConnect ( bal_branch_c, bal_from_c_new );
- setPrevInScaf ( bal_nextCNT->nextInScaf, 0 );
- setNextInScaf ( prevCNT, origin->nextInScaf );
- setNextInScaf ( bal_nextCNT, bal_prevCNT );
- setPrevInScaf ( bal_prevCNT, 1 );
- setNextInScaf ( origin, NULL );
- setPrevInScaf ( bal_origin, 0 );
+ if ( !origin || !origin->nextInScaf )
+ {
+ return;
+ }
+
+ unsigned int branch_c, to_c;
+ unsigned int bal_branch_c, bal_to_c;
+ unsigned int bal_from_c_new = getTwinCtg ( from_c_new );
+ CONNECT *bal_origin, *bal_nextCNT, *prevCNT, *bal_prevCNT;
+ branch_c = origin->contigID;
+ to_c = origin->nextInScaf->contigID;
+ bal_branch_c = getTwinCtg ( branch_c );
+ bal_to_c = getTwinCtg ( to_c );
+ prevCNT = checkConnect ( from_c_new, branch_c );
+ bal_nextCNT = checkConnect ( bal_to_c, bal_branch_c );
+
+ if ( !bal_nextCNT )
+ {
+ fprintf ( stderr, "substitueUSinScaf: no connect between %d and %d\n", bal_to_c, bal_branch_c );
+ return;
+ }
+
+ bal_origin = bal_nextCNT->nextInScaf;
+ bal_prevCNT = checkConnect ( bal_branch_c, bal_from_c_new );
+ setPrevInScaf ( bal_nextCNT->nextInScaf, 0 );
+ setNextInScaf ( prevCNT, origin->nextInScaf );
+ setNextInScaf ( bal_nextCNT, bal_prevCNT );
+ setPrevInScaf ( bal_prevCNT, 1 );
+ setNextInScaf ( origin, NULL );
+ setPrevInScaf ( bal_origin, 0 );
}
@@ -759,47 +803,51 @@ Output:
Return:
None.
*************************************************/
-static void substitueDSinScaf ( CONNECT * origin, unsigned int branch_c, unsigned int to_c_new )
+static void substitueDSinScaf ( CONNECT *origin, unsigned int branch_c, unsigned int to_c_new )
{
- if ( !origin || !origin->prevInScaf )
- { return; }
-
- unsigned int to_c;
- unsigned int bal_branch_c, bal_to_c, bal_to_c_new;
- unsigned int from_c, bal_from_c;
- CONNECT * bal_origin, *prevCNT, *bal_prevCNT;
- CONNECT * nextCNT, *bal_nextCNT;
- to_c = origin->contigID;
- bal_branch_c = getTwinCtg ( branch_c );
- bal_to_c = getTwinCtg ( to_c );
- bal_origin = getCntBetween ( bal_to_c, bal_branch_c );
-
- if ( !bal_origin )
- {
- fprintf ( stderr, "substitueDSinScaf: no connect between %d and %d\n", bal_to_c, bal_branch_c );
- return;
- }
-
- if ( bal_origin->nextInScaf )
- { bal_from_c = bal_origin->nextInScaf->contigID; }
- else
- {
- fprintf ( stderr, "next null! %d\t%d\n", bal_to_c, bal_branch_c );
- exit ( 3 );
- }
-
- bal_from_c = bal_origin->nextInScaf->contigID;
- from_c = getTwinCtg ( bal_from_c );
- bal_to_c_new = getTwinCtg ( to_c_new );
- prevCNT = checkConnect ( from_c, branch_c );
- nextCNT = checkConnect ( branch_c, to_c_new );
- setNextInScaf ( prevCNT, nextCNT );
- setPrevInScaf ( nextCNT, 1 );
- bal_nextCNT = checkConnect ( bal_to_c_new, bal_branch_c );
- bal_prevCNT = checkConnect ( bal_branch_c, bal_from_c );
- setNextInScaf ( bal_nextCNT, bal_prevCNT );
- setPrevInScaf ( origin, 0 );
- setNextInScaf ( bal_origin, NULL );
+ if ( !origin || !origin->prevInScaf )
+ {
+ return;
+ }
+
+ unsigned int to_c;
+ unsigned int bal_branch_c, bal_to_c, bal_to_c_new;
+ unsigned int from_c, bal_from_c;
+ CONNECT *bal_origin, *prevCNT, *bal_prevCNT;
+ CONNECT *nextCNT, *bal_nextCNT;
+ to_c = origin->contigID;
+ bal_branch_c = getTwinCtg ( branch_c );
+ bal_to_c = getTwinCtg ( to_c );
+ bal_origin = getCntBetween ( bal_to_c, bal_branch_c );
+
+ if ( !bal_origin )
+ {
+ fprintf ( stderr, "substitueDSinScaf: no connect between %d and %d\n", bal_to_c, bal_branch_c );
+ return;
+ }
+
+ if ( bal_origin->nextInScaf )
+ {
+ bal_from_c = bal_origin->nextInScaf->contigID;
+ }
+ else
+ {
+ fprintf ( stderr, "next null! %d\t%d\n", bal_to_c, bal_branch_c );
+ exit ( 3 );
+ }
+
+ bal_from_c = bal_origin->nextInScaf->contigID;
+ from_c = getTwinCtg ( bal_from_c );
+ bal_to_c_new = getTwinCtg ( to_c_new );
+ prevCNT = checkConnect ( from_c, branch_c );
+ nextCNT = checkConnect ( branch_c, to_c_new );
+ setNextInScaf ( prevCNT, nextCNT );
+ setPrevInScaf ( nextCNT, 1 );
+ bal_nextCNT = checkConnect ( bal_to_c_new, bal_branch_c );
+ bal_prevCNT = checkConnect ( bal_branch_c, bal_from_c );
+ setNextInScaf ( bal_nextCNT, bal_prevCNT );
+ setPrevInScaf ( origin, 0 );
+ setNextInScaf ( bal_origin, NULL );
}
/*************************************************
@@ -820,28 +868,34 @@ Return:
1 if contig had upstream conntcion and downstream connection.
Non-deleted and non-masked connections number otherwise.
*************************************************/
-static int validConnect ( unsigned int ctg, CONNECT * preCNT )
+static int validConnect ( unsigned int ctg, CONNECT *preCNT )
{
- if ( preCNT && preCNT->nextInScaf )
- { return 1; }
+ if ( preCNT && preCNT->nextInScaf )
+ {
+ return 1;
+ }
- CONNECT * cn_temp;
- int count = 0;
+ CONNECT *cn_temp;
+ int count = 0;
- if ( !contig_array[ctg].downwardConnect )
- { return count; }
+ if ( !contig_array[ctg].downwardConnect )
+ {
+ return count;
+ }
- cn_temp = contig_array[ctg].downwardConnect;
+ cn_temp = contig_array[ctg].downwardConnect;
- while ( cn_temp )
- {
- if ( !cn_temp->deleted && !cn_temp->mask )
- { count++; }
+ while ( cn_temp )
+ {
+ if ( !cn_temp->deleted && !cn_temp->mask )
+ {
+ count++;
+ }
- cn_temp = cn_temp->next;
- }
+ cn_temp = cn_temp->next;
+ }
- return count;
+ return count;
}
/*************************************************
@@ -861,78 +915,94 @@ Output:
Return:
Pointer to qualified connection or NULL.
*************************************************/
-static CONNECT * getNextContig ( unsigned int ctg, CONNECT * preCNT, boolean * exception )
+static CONNECT *getNextContig ( unsigned int ctg, CONNECT *preCNT, boolean *exception )
{
- CONNECT * cn_temp, *retCNT = NULL, *dh_cnt;
- int count = 0, valid_in;
- unsigned int nextCtg, bal_ctg;
- *exception = 0;
-
- if ( preCNT && preCNT->nextInScaf )
- {
- if ( preCNT->contigID != ctg )
- { fprintf ( stderr, "pre cnt does not lead to %d\n", ctg ); }
-
- nextCtg = preCNT->nextInScaf->contigID;
- cn_temp = getCntBetween ( ctg, nextCtg );
- dh_cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( ctg ) );
-
- if ( cn_temp && ( cn_temp->mask || cn_temp->deleted ) )
- {
- int id1 = 0, id2 = 0;
-
- if ( dh_cnt->nextInScaf )
- {
- id1 = dh_cnt->nextInScaf->contigID;
- id2 = getTwinCtg ( dh_cnt->nextInScaf->contigID );
- }
-
- if ( !cn_temp->prevInScaf )
- { fprintf ( stderr, "not even has a prevInScaf %d and %d, %d and %d with before %d with twin %d\n", ctg, nextCtg, getTwinCtg ( nextCtg ), getTwinCtg ( ctg ), id1, id2 ); }
-
- cn_temp = getCntBetween ( getTwinCtg ( nextCtg ),
- getTwinCtg ( ctg ) );
-
- if ( !cn_temp->nextInScaf )
- { fprintf ( stderr, "its twin cnt not has a nextInScaf\n" ); }
-
- fflush ( stdout );
- *exception = 1;
- }
- else
- { return preCNT->nextInScaf; }
- }
-
- bal_ctg = getTwinCtg ( ctg );
- valid_in = validConnect ( bal_ctg, NULL );
-
- if ( valid_in > 1 )
- { return NULL; }
-
- if ( !contig_array[ctg].downwardConnect )
- { return NULL; }
-
- cn_temp = contig_array[ctg].downwardConnect;
-
- while ( cn_temp )
- {
- if ( cn_temp->mask || cn_temp->deleted )
- {
- cn_temp = cn_temp->next;
- continue;
- }
-
- count++;
-
- if ( count == 1 )
- { retCNT = cn_temp; }
- else if ( count == 2 )
- { return NULL; }
-
- cn_temp = cn_temp->next;
- }
-
- return retCNT;
+ CONNECT *cn_temp, *retCNT = NULL, *dh_cnt;
+ int count = 0, valid_in;
+ unsigned int nextCtg, bal_ctg;
+ *exception = 0;
+
+ if ( preCNT && preCNT->nextInScaf )
+ {
+ if ( preCNT->contigID != ctg )
+ {
+ fprintf ( stderr, "pre cnt does not lead to %d\n", ctg );
+ }
+
+ nextCtg = preCNT->nextInScaf->contigID;
+ cn_temp = getCntBetween ( ctg, nextCtg );
+ dh_cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( ctg ) );
+
+ if ( cn_temp && ( cn_temp->mask || cn_temp->deleted ) )
+ {
+ int id1 = 0, id2 = 0;
+
+ if ( dh_cnt->nextInScaf )
+ {
+ id1 = dh_cnt->nextInScaf->contigID;
+ id2 = getTwinCtg ( dh_cnt->nextInScaf->contigID );
+ }
+
+ if ( !cn_temp->prevInScaf )
+ {
+ fprintf ( stderr, "not even has a prevInScaf %d and %d, %d and %d with before %d with twin %d\n", ctg, nextCtg, getTwinCtg ( nextCtg ), getTwinCtg ( ctg ), id1, id2 );
+ }
+
+ cn_temp = getCntBetween ( getTwinCtg ( nextCtg ),
+ getTwinCtg ( ctg ) );
+
+ if ( !cn_temp->nextInScaf )
+ {
+ fprintf ( stderr, "its twin cnt not has a nextInScaf\n" );
+ }
+
+ fflush ( stdout );
+ *exception = 1;
+ }
+ else
+ {
+ return preCNT->nextInScaf;
+ }
+ }
+
+ bal_ctg = getTwinCtg ( ctg );
+ valid_in = validConnect ( bal_ctg, NULL );
+
+ if ( valid_in > 1 )
+ {
+ return NULL;
+ }
+
+ if ( !contig_array[ctg].downwardConnect )
+ {
+ return NULL;
+ }
+
+ cn_temp = contig_array[ctg].downwardConnect;
+
+ while ( cn_temp )
+ {
+ if ( cn_temp->mask || cn_temp->deleted )
+ {
+ cn_temp = cn_temp->next;
+ continue;
+ }
+
+ count++;
+
+ if ( count == 1 )
+ {
+ retCNT = cn_temp;
+ }
+ else if ( count == 2 )
+ {
+ return NULL;
+ }
+
+ cn_temp = cn_temp->next;
+ }
+
+ return retCNT;
}
/*************************************************
@@ -949,19 +1019,23 @@ Output:
Return:
Pointer to qualified connection or NULL.
*************************************************/
-static CONNECT * checkConnect ( unsigned int from_c, unsigned int to_c )
+static CONNECT *checkConnect ( unsigned int from_c, unsigned int to_c )
{
- CONNECT * cn_temp = getCntBetween ( from_c, to_c );
+ CONNECT *cn_temp = getCntBetween ( from_c, to_c );
- if ( !cn_temp )
- { return NULL; }
+ if ( !cn_temp )
+ {
+ return NULL;
+ }
- if ( !cn_temp->mask && !cn_temp->deleted )
- { return cn_temp; }
+ if ( !cn_temp->mask && !cn_temp->deleted )
+ {
+ return cn_temp;
+ }
- //else
- //printf("masked or deleted: %d\t%d\t%d\t%d\n",from_c,to_c,cn_temp->mask,cn_temp->deleted);
- return NULL;
+ //else
+ //printf("masked or deleted: %d\t%d\t%d\t%d\n",from_c,to_c,cn_temp->mask,cn_temp->deleted);
+ return NULL;
}
/*************************************************
@@ -982,49 +1056,51 @@ Return:
*************************************************/
static int setConnectMask ( unsigned int from_c, unsigned int to_c, char mask )
{
- CONNECT * cn_temp, *cn_bal, *cn_ds, *cn_us;
- unsigned int bal_fc = getTwinCtg ( from_c );
- unsigned int bal_tc = getTwinCtg ( to_c );
- unsigned int ctg3, bal_ctg3;
- cn_temp = getCntBetween ( from_c, to_c );
- cn_bal = getCntBetween ( bal_tc, bal_fc );
-
- if ( !cn_temp || !cn_bal )
- {
- return 0;
- }
-
- cn_temp->mask = mask;
- cn_bal->mask = mask;
-
- if ( !mask )
- { return 1; }
-
- if ( cn_temp->nextInScaf ) //undo the binding
- {
- setPrevInScaf ( cn_temp->nextInScaf, 0 );
- ctg3 = cn_temp->nextInScaf->contigID;
- setNextInScaf ( cn_temp, NULL );
- bal_ctg3 = getTwinCtg ( ctg3 );
- cn_ds = getCntBetween ( bal_ctg3, bal_tc );
- setNextInScaf ( cn_ds, NULL );
- setPrevInScaf ( cn_bal, 0 );
- }
-
- // ctg3 -> from_c -> to_c
- // bal_ctg3 <- bal_fc <- bal_tc
- if ( cn_bal->nextInScaf )
- {
- setPrevInScaf ( cn_bal->nextInScaf, 0 );
- bal_ctg3 = cn_bal->nextInScaf->contigID;
- setNextInScaf ( cn_bal, NULL );
- ctg3 = getTwinCtg ( bal_ctg3 );
- cn_us = getCntBetween ( ctg3, from_c );
- setNextInScaf ( cn_us, NULL );
- setPrevInScaf ( cn_temp, 0 );
- }
-
- return 1;
+ CONNECT *cn_temp, *cn_bal, *cn_ds, *cn_us;
+ unsigned int bal_fc = getTwinCtg ( from_c );
+ unsigned int bal_tc = getTwinCtg ( to_c );
+ unsigned int ctg3, bal_ctg3;
+ cn_temp = getCntBetween ( from_c, to_c );
+ cn_bal = getCntBetween ( bal_tc, bal_fc );
+
+ if ( !cn_temp || !cn_bal )
+ {
+ return 0;
+ }
+
+ cn_temp->mask = mask;
+ cn_bal->mask = mask;
+
+ if ( !mask )
+ {
+ return 1;
+ }
+
+ if ( cn_temp->nextInScaf ) //undo the binding
+ {
+ setPrevInScaf ( cn_temp->nextInScaf, 0 );
+ ctg3 = cn_temp->nextInScaf->contigID;
+ setNextInScaf ( cn_temp, NULL );
+ bal_ctg3 = getTwinCtg ( ctg3 );
+ cn_ds = getCntBetween ( bal_ctg3, bal_tc );
+ setNextInScaf ( cn_ds, NULL );
+ setPrevInScaf ( cn_bal, 0 );
+ }
+
+ // ctg3 -> from_c -> to_c
+ // bal_ctg3 <- bal_fc <- bal_tc
+ if ( cn_bal->nextInScaf )
+ {
+ setPrevInScaf ( cn_bal->nextInScaf, 0 );
+ bal_ctg3 = cn_bal->nextInScaf->contigID;
+ setNextInScaf ( cn_bal, NULL );
+ ctg3 = getTwinCtg ( bal_ctg3 );
+ cn_us = getCntBetween ( ctg3, from_c );
+ setNextInScaf ( cn_us, NULL );
+ setPrevInScaf ( cn_temp, 0 );
+ }
+
+ return 1;
}
/*************************************************
@@ -1044,20 +1120,20 @@ Return:
*************************************************/
static boolean setConnectUsed ( unsigned int from_c, unsigned int to_c, char flag )
{
- CONNECT * cn_temp, *cn_bal;
- unsigned int bal_fc = getTwinCtg ( from_c );
- unsigned int bal_tc = getTwinCtg ( to_c );
- cn_temp = getCntBetween ( from_c, to_c );
- cn_bal = getCntBetween ( bal_tc, bal_fc );
-
- if ( !cn_temp || !cn_bal )
- {
- return 0;
- }
-
- cn_temp->used = flag;
- cn_bal->used = flag;
- return 1;
+ CONNECT *cn_temp, *cn_bal;
+ unsigned int bal_fc = getTwinCtg ( from_c );
+ unsigned int bal_tc = getTwinCtg ( to_c );
+ cn_temp = getCntBetween ( from_c, to_c );
+ cn_bal = getCntBetween ( bal_tc, bal_fc );
+
+ if ( !cn_temp || !cn_bal )
+ {
+ return 0;
+ }
+
+ cn_temp->used = flag;
+ cn_bal->used = flag;
+ return 1;
}
/*************************************************
@@ -1077,20 +1153,20 @@ Return:
*************************************************/
static int setConnectWP ( unsigned int from_c, unsigned int to_c, char flag )
{
- CONNECT * cn_temp, *cn_bal;
- unsigned int bal_fc = getTwinCtg ( from_c );
- unsigned int bal_tc = getTwinCtg ( to_c );
- cn_temp = getCntBetween ( from_c, to_c );
- cn_bal = getCntBetween ( bal_tc, bal_fc );
-
- if ( !cn_temp || !cn_bal )
- {
- return 0;
- }
-
- cn_temp->weakPoint = flag;
- cn_bal->weakPoint = flag;
- return 1;
+ CONNECT *cn_temp, *cn_bal;
+ unsigned int bal_fc = getTwinCtg ( from_c );
+ unsigned int bal_tc = getTwinCtg ( to_c );
+ cn_temp = getCntBetween ( from_c, to_c );
+ cn_bal = getCntBetween ( bal_tc, bal_fc );
+
+ if ( !cn_temp || !cn_bal )
+ {
+ return 0;
+ }
+
+ cn_temp->weakPoint = flag;
+ cn_bal->weakPoint = flag;
+ return 1;
}
/*************************************************
@@ -1112,32 +1188,34 @@ Return:
*************************************************/
static int setConnectDelete ( unsigned int from_c, unsigned int to_c, char flag, boolean cleanBinding )
{
- CONNECT * cn_temp, *cn_bal;
- unsigned int bal_fc = getTwinCtg ( from_c );
- unsigned int bal_tc = getTwinCtg ( to_c );
- cn_temp = getCntBetween ( from_c, to_c );
- cn_bal = getCntBetween ( bal_tc, bal_fc );
-
- if ( !cn_temp || !cn_bal )
- {
- return 0;
- }
-
- cn_temp->deleted = flag;
- cn_bal->deleted = flag;
-
- if ( !flag )
- { return 1; }
-
- if ( cleanBinding )
- {
- cn_temp->prevInScaf = 0;
- cn_temp->nextInScaf = NULL;
- cn_bal->prevInScaf = 0;
- cn_bal->nextInScaf = NULL;
- }
-
- return 1;
+ CONNECT *cn_temp, *cn_bal;
+ unsigned int bal_fc = getTwinCtg ( from_c );
+ unsigned int bal_tc = getTwinCtg ( to_c );
+ cn_temp = getCntBetween ( from_c, to_c );
+ cn_bal = getCntBetween ( bal_tc, bal_fc );
+
+ if ( !cn_temp || !cn_bal )
+ {
+ return 0;
+ }
+
+ cn_temp->deleted = flag;
+ cn_bal->deleted = flag;
+
+ if ( !flag )
+ {
+ return 1;
+ }
+
+ if ( cleanBinding )
+ {
+ cn_temp->prevInScaf = 0;
+ cn_temp->nextInScaf = NULL;
+ cn_bal->prevInScaf = 0;
+ cn_bal->nextInScaf = NULL;
+ }
+
+ return 1;
}
/*************************************************
@@ -1156,42 +1234,42 @@ Return:
*************************************************/
static void maskContig ( unsigned int ctg, boolean flag )
{
- unsigned int bal_ctg, ctg2, bal_ctg2;
- CONNECT * cn_temp;
- bal_ctg = getTwinCtg ( ctg );
- cn_temp = contig_array[ctg].downwardConnect;
-
- while ( cn_temp )
- {
- if ( cn_temp->mask || cn_temp->prevInScaf || cn_temp->nextInScaf || cn_temp->singleInScaf )
- {
- cn_temp = cn_temp->next;
- continue;
- }
-
- ctg2 = cn_temp->contigID;
- setConnectMask ( ctg, ctg2, flag );
- cn_temp = cn_temp->next;
- }
-
- // bal_ctg2 <- bal_ctg
- cn_temp = contig_array[bal_ctg].downwardConnect;
-
- while ( cn_temp )
- {
- if ( cn_temp->mask || cn_temp->prevInScaf || cn_temp->nextInScaf || cn_temp->singleInScaf )
- {
- cn_temp = cn_temp->next;
- continue;
- }
-
- bal_ctg2 = cn_temp->contigID;
- setConnectMask ( bal_ctg, bal_ctg2, flag );
- cn_temp = cn_temp->next;
- }
-
- contig_array[ctg].mask = flag;
- contig_array[bal_ctg].mask = flag;
+ unsigned int bal_ctg, ctg2, bal_ctg2;
+ CONNECT *cn_temp;
+ bal_ctg = getTwinCtg ( ctg );
+ cn_temp = contig_array[ctg].downwardConnect;
+
+ while ( cn_temp )
+ {
+ if ( cn_temp->mask || cn_temp->prevInScaf || cn_temp->nextInScaf || cn_temp->singleInScaf )
+ {
+ cn_temp = cn_temp->next;
+ continue;
+ }
+
+ ctg2 = cn_temp->contigID;
+ setConnectMask ( ctg, ctg2, flag );
+ cn_temp = cn_temp->next;
+ }
+
+ // bal_ctg2 <- bal_ctg
+ cn_temp = contig_array[bal_ctg].downwardConnect;
+
+ while ( cn_temp )
+ {
+ if ( cn_temp->mask || cn_temp->prevInScaf || cn_temp->nextInScaf || cn_temp->singleInScaf )
+ {
+ cn_temp = cn_temp->next;
+ continue;
+ }
+
+ bal_ctg2 = cn_temp->contigID;
+ setConnectMask ( bal_ctg, bal_ctg2, flag );
+ cn_temp = cn_temp->next;
+ }
+
+ contig_array[ctg].mask = flag;
+ contig_array[bal_ctg].mask = flag;
}
@@ -1212,45 +1290,51 @@ Return:
*************************************************/
static int maskPuzzle ( int num_connect, unsigned int contigLen )
{
- int in_num, out_num, flag = 0, puzzleCounter = 0;
- unsigned int i, bal_i;
- fprintf ( stderr, "Start to mask puzzles.\n" );
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contigLen && contig_array[i].length > contigLen )
- { break; }
-
- if ( contig_array[i].mask )
- { continue; }
-
- bal_i = getTwinCtg ( i );
- in_num = validConnect ( bal_i, NULL );
- out_num = validConnect ( i, NULL );
-
- if ( ( in_num > 1 || out_num > 1 ) && ( in_num + out_num >= num_connect ) )
- {
- flag++;
- maskContig ( i, 1 );
- }
-
- // upstream connection in scaffold
- in_num = validConnect ( bal_i, NULL );
- // downstream connection in scaffold
- out_num = validConnect ( i, NULL );
-
- if ( in_num > 1 || out_num > 1 )
- {
- puzzleCounter++;
- //debugging2(i);
- }
-
- if ( isSmallerThanTwin ( i ) )
- { i++; }
- }
-
- fprintf ( stderr, " Masked contigs %d\n Remained puzzles %d\n", flag, puzzleCounter );
- return flag;
+ int in_num, out_num, flag = 0, puzzleCounter = 0;
+ unsigned int i, bal_i;
+ fprintf ( stderr, "Start to mask puzzles.\n" );
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contigLen && contig_array[i].length > contigLen )
+ {
+ break;
+ }
+
+ if ( contig_array[i].mask )
+ {
+ continue;
+ }
+
+ bal_i = getTwinCtg ( i );
+ in_num = validConnect ( bal_i, NULL );
+ out_num = validConnect ( i, NULL );
+
+ if ( ( in_num > 1 || out_num > 1 ) && ( in_num + out_num >= num_connect ) )
+ {
+ flag++;
+ maskContig ( i, 1 );
+ }
+
+ // upstream connection in scaffold
+ in_num = validConnect ( bal_i, NULL );
+ // downstream connection in scaffold
+ out_num = validConnect ( i, NULL );
+
+ if ( in_num > 1 || out_num > 1 )
+ {
+ puzzleCounter++;
+ //debugging2(i);
+ }
+
+ if ( isSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+ }
+
+ fprintf ( stderr, " Masked contigs %d\n Remained puzzles %d\n", flag, puzzleCounter );
+ return flag;
}
/*************************************************
@@ -1271,48 +1355,54 @@ Return:
*************************************************/
static void deleteWeakCnt ( int cut_off )
{
- unsigned int i;
- CONNECT * cn_temp1;
- int weaks = 0, counter = 0;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- cn_temp1 = contig_array[i].downwardConnect;
-
- while ( cn_temp1 )
- {
- if ( !cn_temp1->mask && !cn_temp1->deleted && !cn_temp1->nextInScaf
- && !cn_temp1->singleInScaf && !cn_temp1->prevInScaf )
- {
- counter++;
- }
-
- if ( cn_temp1->weak && cn_temp1->deleted && cn_temp1->weight >= cut_off )
- {
- cn_temp1->deleted = 0;
- cn_temp1->weak = 0;
- }
- else if ( !cn_temp1->deleted && cn_temp1->weight > 0 && cn_temp1->weight < cut_off
- && !cn_temp1->nextInScaf && !cn_temp1->prevInScaf )
- {
- cn_temp1->deleted = 1;
- cn_temp1->weak = 1;
-
- if ( cn_temp1->singleInScaf )
- { cn_temp1->singleInScaf = 0; }
-
- if ( !cn_temp1->mask )
- { weaks++; }
- }
-
- cn_temp1 = cn_temp1->next;
- }
- }
-
- if ( counter > 0 )
- { fprintf ( stderr, " Active connections %d\n Weak connections %d\n Weak ratio %.1f%%\n", counter, weaks, ( float ) weaks / counter * 100 ); }
-
- checkCircle();
+ unsigned int i;
+ CONNECT *cn_temp1;
+ int weaks = 0, counter = 0;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ cn_temp1 = contig_array[i].downwardConnect;
+
+ while ( cn_temp1 )
+ {
+ if ( !cn_temp1->mask && !cn_temp1->deleted && !cn_temp1->nextInScaf
+ && !cn_temp1->singleInScaf && !cn_temp1->prevInScaf )
+ {
+ counter++;
+ }
+
+ if ( cn_temp1->weak && cn_temp1->deleted && cn_temp1->weight >= cut_off )
+ {
+ cn_temp1->deleted = 0;
+ cn_temp1->weak = 0;
+ }
+ else if ( !cn_temp1->deleted && cn_temp1->weight > 0 && cn_temp1->weight < cut_off
+ && !cn_temp1->nextInScaf && !cn_temp1->prevInScaf )
+ {
+ cn_temp1->deleted = 1;
+ cn_temp1->weak = 1;
+
+ if ( cn_temp1->singleInScaf )
+ {
+ cn_temp1->singleInScaf = 0;
+ }
+
+ if ( !cn_temp1->mask )
+ {
+ weaks++;
+ }
+ }
+
+ cn_temp1 = cn_temp1->next;
+ }
+ }
+
+ if ( counter > 0 )
+ {
+ fprintf ( stderr, " Active connections %d\n Weak connections %d\n Weak ratio %.1f%%\n", counter, weaks, ( float ) weaks / counter * 100 );
+ }
+
+ checkCircle();
}
@@ -1340,165 +1430,177 @@ Return:
1 if connection path was found or connection was created.
0 otherwise.
*************************************************/
-static int linearC2C ( unsigned int starter, CONNECT * cnt2c1, unsigned int c2, int min_dis, int max_dis )
+static int linearC2C ( unsigned int starter, CONNECT *cnt2c1, unsigned int c2, int min_dis, int max_dis )
{
- int out_num, in_num;
- CONNECT * prevCNT, *cnt, *cn_temp;
- unsigned int c1, bal_c1, ctg, bal_c2;
- int len = 0;
- unsigned int bal_start = getTwinCtg ( starter );
- boolean excep;
- c1 = cnt2c1->contigID;
-
- if ( c1 == c2 )
- {
- fprintf ( stderr, "linearC2C: c1(%d) and c2(%d) are the same contig.\n", c1, c2 );
- return -1;
- }
-
- bal_c1 = getTwinCtg ( c1 );
- dsCtgCounter = 1;
- usCtgCounter = 0;
- downstreamCTG[dsCtgCounter++] = c1;
- bal_c2 = getTwinCtg ( c2 );
- upstreamCTG[usCtgCounter++] = bal_c2;
- // check if c1 is linearly connected to c2 by pe connections
- cnt = prevCNT = cnt2c1;
-
- while ( ( cnt = getNextContig ( c1, prevCNT, &excep ) ) != NULL )
- {
- c1 = cnt->contigID;
- len += cnt->gapLen + contig_array[c1].length;
-
- if ( c1 == c2 )
- {
- usCtgCounter--;
- return 1; //is interleaving.
- }
-
- if ( len > max_dis || c1 == starter || c1 == bal_start )
- { return 0; }
-
- downstreamCTG[dsCtgCounter++] = c1;
-
- if ( dsCtgCounter >= MAXCinBetween )
- {
- fprintf ( stderr, "%d downstream contigs, start at %d, max_dis %d, current dis %d\n"
- , dsCtgCounter, starter, max_dis, len );
- return 0;
- }
-
- prevCNT = cnt;
- }
-
- out_num = validConnect ( c1, NULL ); //new c1 should have no outgoing link.
-
- if ( out_num )
- { return 0; }
-
- //find the most upstream contig to c2
- cnt = prevCNT = NULL;
- ctg = bal_c2;
-
- while ( ( cnt = getNextContig ( ctg, prevCNT, &excep ) ) != NULL )
- {
- ctg = cnt->contigID;
- len += cnt->gapLen + contig_array[ctg].length;
-
- if ( len > max_dis || ctg == starter || ctg == bal_start )
- { return 0; }
-
- prevCNT = cnt;
- upstreamCTG[usCtgCounter++] = ctg;
-
- if ( usCtgCounter >= MAXCinBetween )
- {
- fprintf ( stderr, "%d upstream contigs, start at %d, max_dis %d, current dis %d\n"
- , usCtgCounter, starter, max_dis, len );
- return 0;
- }
- }
-
- if ( dsCtgCounter + usCtgCounter > MAXCinBetween )
- {
- fprintf ( stderr, "%d downstream and %d upstream contigs.\n", dsCtgCounter, usCtgCounter );
- return 0;
- }
-
- out_num = validConnect ( ctg, NULL ); //new c2 have no incoming link.
-
- if ( out_num )
- {
- return 0;
- }
-
- c2 = getTwinCtg ( ctg );
- min_dis -= len;
- max_dis -= len;
-
- if ( c1 == c2 || c1 == ctg || max_dis < 0 )
- { return 0; }
-
- usCtgCounter--;
- cn_temp = getCntBetween ( c1, c2 ); //have connection between new c1 and new c2
-
- if ( cn_temp )
- {
- setConnectMask ( c1, c2, 0 );
- setConnectDelete ( c1, c2, 0, 0 );
- return 1;
- }
-
- int oldsize = usCtgCounter;
-
- while ( getCntBetween ( c2, c1 ) && usCtgCounter > 1 )
- {
- usCtgCounter--;
- c2 = getTwinCtg ( upstreamCTG[usCtgCounter] );
- }
-
- if ( usCtgCounter != oldsize )
- {
- unsigned int prev_c2 = upstreamCTG[usCtgCounter + 1];
- unsigned int bal_prev_c2 = getTwinCtg ( prev_c2 );
- setConnectMask ( bal_prev_c2, c2, 1 );
- setConnectMask ( bal_prev_c2, c2, 0 );
- int i = usCtgCounter + 1;
-
- for ( ; i <= oldsize; i++ )
- {
- contig_array[upstreamCTG[i]].from_vt = prev_c2;
- contig_array[getTwinCtg ( upstreamCTG[i] )].to_vt = bal_prev_c2;
- }
-
- if ( ( cn_temp = getCntBetween ( c1, c2 ) ) != NULL )
- {
- setConnectMask ( c1, c2, 0 );
- setConnectDelete ( c1, c2, 0, 0 );
- return 1;
- }
- }
-
- len = ( min_dis + max_dis ) / 2 >= 0 ? ( min_dis + max_dis ) / 2 : 0;
- cn_temp = allocateCN ( c2, len );
-
- if ( cntLookupTable )
- { putCnt2LookupTable ( c1, cn_temp ); }
-
- cn_temp->weight = 0; // special connect from the original graph
- cn_temp->next = contig_array[c1].downwardConnect;
- contig_array[c1].downwardConnect = cn_temp;
- bal_c1 = getTwinCtg ( c1 );
- bal_c2 = getTwinCtg ( c2 );
- cn_temp = allocateCN ( bal_c1, len );
-
- if ( cntLookupTable )
- { putCnt2LookupTable ( bal_c2, cn_temp ); }
-
- cn_temp->weight = 0; // special connect from the original graph
- cn_temp->next = contig_array[bal_c2].downwardConnect;
- contig_array[bal_c2].downwardConnect = cn_temp;
- return 1;
+ int out_num, in_num;
+ CONNECT *prevCNT, *cnt, *cn_temp;
+ unsigned int c1, bal_c1, ctg, bal_c2;
+ int len = 0;
+ unsigned int bal_start = getTwinCtg ( starter );
+ boolean excep;
+ c1 = cnt2c1->contigID;
+
+ if ( c1 == c2 )
+ {
+ fprintf ( stderr, "linearC2C: c1(%d) and c2(%d) are the same contig.\n", c1, c2 );
+ return -1;
+ }
+
+ bal_c1 = getTwinCtg ( c1 );
+ dsCtgCounter = 1;
+ usCtgCounter = 0;
+ downstreamCTG[dsCtgCounter++] = c1;
+ bal_c2 = getTwinCtg ( c2 );
+ upstreamCTG[usCtgCounter++] = bal_c2;
+ // check if c1 is linearly connected to c2 by pe connections
+ cnt = prevCNT = cnt2c1;
+
+ while ( ( cnt = getNextContig ( c1, prevCNT, &excep ) ) != NULL )
+ {
+ c1 = cnt->contigID;
+ len += cnt->gapLen + contig_array[c1].length;
+
+ if ( c1 == c2 )
+ {
+ usCtgCounter--;
+ return 1; //is interleaving.
+ }
+
+ if ( len > max_dis || c1 == starter || c1 == bal_start )
+ {
+ return 0;
+ }
+
+ downstreamCTG[dsCtgCounter++] = c1;
+
+ if ( dsCtgCounter >= MAXCinBetween )
+ {
+ fprintf ( stderr, "%d downstream contigs, start at %d, max_dis %d, current dis %d\n"
+ , dsCtgCounter, starter, max_dis, len );
+ return 0;
+ }
+
+ prevCNT = cnt;
+ }
+
+ out_num = validConnect ( c1, NULL ); //new c1 should have no outgoing link.
+
+ if ( out_num )
+ {
+ return 0;
+ }
+
+ //find the most upstream contig to c2
+ cnt = prevCNT = NULL;
+ ctg = bal_c2;
+
+ while ( ( cnt = getNextContig ( ctg, prevCNT, &excep ) ) != NULL )
+ {
+ ctg = cnt->contigID;
+ len += cnt->gapLen + contig_array[ctg].length;
+
+ if ( len > max_dis || ctg == starter || ctg == bal_start )
+ {
+ return 0;
+ }
+
+ prevCNT = cnt;
+ upstreamCTG[usCtgCounter++] = ctg;
+
+ if ( usCtgCounter >= MAXCinBetween )
+ {
+ fprintf ( stderr, "%d upstream contigs, start at %d, max_dis %d, current dis %d\n"
+ , usCtgCounter, starter, max_dis, len );
+ return 0;
+ }
+ }
+
+ if ( dsCtgCounter + usCtgCounter > MAXCinBetween )
+ {
+ fprintf ( stderr, "%d downstream and %d upstream contigs.\n", dsCtgCounter, usCtgCounter );
+ return 0;
+ }
+
+ out_num = validConnect ( ctg, NULL ); //new c2 have no incoming link.
+
+ if ( out_num )
+ {
+ return 0;
+ }
+
+ c2 = getTwinCtg ( ctg );
+ min_dis -= len;
+ max_dis -= len;
+
+ if ( c1 == c2 || c1 == ctg || max_dis < 0 )
+ {
+ return 0;
+ }
+
+ usCtgCounter--;
+ cn_temp = getCntBetween ( c1, c2 ); //have connection between new c1 and new c2
+
+ if ( cn_temp )
+ {
+ setConnectMask ( c1, c2, 0 );
+ setConnectDelete ( c1, c2, 0, 0 );
+ return 1;
+ }
+
+ int oldsize = usCtgCounter;
+
+ while ( getCntBetween ( c2, c1 ) && usCtgCounter > 1 )
+ {
+ usCtgCounter--;
+ c2 = getTwinCtg ( upstreamCTG[usCtgCounter] );
+ }
+
+ if ( usCtgCounter != oldsize )
+ {
+ unsigned int prev_c2 = upstreamCTG[usCtgCounter + 1];
+ unsigned int bal_prev_c2 = getTwinCtg ( prev_c2 );
+ setConnectMask ( bal_prev_c2, c2, 1 );
+ setConnectMask ( bal_prev_c2, c2, 0 );
+ int i = usCtgCounter + 1;
+
+ for ( ; i <= oldsize; i++ )
+ {
+ contig_array[upstreamCTG[i]].from_vt = prev_c2;
+ contig_array[getTwinCtg ( upstreamCTG[i] )].to_vt = bal_prev_c2;
+ }
+
+ if ( ( cn_temp = getCntBetween ( c1, c2 ) ) != NULL )
+ {
+ setConnectMask ( c1, c2, 0 );
+ setConnectDelete ( c1, c2, 0, 0 );
+ return 1;
+ }
+ }
+
+ len = ( min_dis + max_dis ) / 2 >= 0 ? ( min_dis + max_dis ) / 2 : 0;
+ cn_temp = allocateCN ( c2, len );
+
+ if ( cntLookupTable )
+ {
+ putCnt2LookupTable ( c1, cn_temp );
+ }
+
+ cn_temp->weight = 0; // special connect from the original graph
+ cn_temp->next = contig_array[c1].downwardConnect;
+ contig_array[c1].downwardConnect = cn_temp;
+ bal_c1 = getTwinCtg ( c1 );
+ bal_c2 = getTwinCtg ( c2 );
+ cn_temp = allocateCN ( bal_c1, len );
+
+ if ( cntLookupTable )
+ {
+ putCnt2LookupTable ( bal_c2, cn_temp );
+ }
+
+ cn_temp->weight = 0; // special connect from the original graph
+ cn_temp->next = contig_array[bal_c2].downwardConnect;
+ contig_array[bal_c2].downwardConnect = cn_temp;
+ return 1;
}
@@ -1516,17 +1618,19 @@ Return:
*************************************************/
static void catUsDsContig()
{
- int i;
+ int i;
- for ( i = 0; i < dsCtgCounter; i++ )
- { * ( unsigned int * ) darrayPut ( solidArray, i ) = downstreamCTG[i]; }
+ for ( i = 0; i < dsCtgCounter; i++ )
+ {
+ * ( unsigned int * ) darrayPut ( solidArray, i ) = downstreamCTG[i];
+ }
- for ( i = usCtgCounter; i >= 0; i-- )
- {
- * ( unsigned int * ) darrayPut ( solidArray, dsCtgCounter++ ) = getTwinCtg ( upstreamCTG[i] );
- }
+ for ( i = usCtgCounter; i >= 0; i-- )
+ {
+ * ( unsigned int * ) darrayPut ( solidArray, dsCtgCounter++ ) = getTwinCtg ( upstreamCTG[i] );
+ }
- solidCounter = dsCtgCounter;
+ solidCounter = dsCtgCounter;
}
@@ -1545,89 +1649,99 @@ Return:
*************************************************/
static void consolidate()
{
- int i, j;
- CONNECT * prevCNT = NULL;
- CONNECT * cnt;
- unsigned int to_ctg;
- unsigned int from_ctg = * ( unsigned int * ) darrayGet ( solidArray, 0 );
-
- for ( i = 1; i < solidCounter; i++ )
- {
- to_ctg = * ( unsigned int * ) darrayGet ( solidArray, i );
- cnt = checkConnect ( from_ctg, to_ctg );
-
- if ( !cnt )
- {
- fprintf ( stderr, "consolidate A: no connect from %d to %d\n",
- from_ctg, to_ctg );
-
- for ( j = 0; j < solidCounter; j++ )
- { fprintf ( stderr, "%d-->", * ( unsigned int * ) darrayGet ( solidArray, j ) ); }
-
- fprintf ( stderr, "\n" );
- return;
- }
-
- cnt->singleInScaf = solidCounter == 2 ? 1 : 0;
-
- if ( prevCNT )
- {
- setNextInScaf ( prevCNT, cnt );
- setPrevInScaf ( cnt, 1 );
- }
-
- prevCNT = cnt;
- from_ctg = to_ctg;
- }
-
- //the reverse complementary path
- from_ctg = getTwinCtg ( * ( unsigned int * ) darrayGet ( solidArray, solidCounter - 1 ) );
- prevCNT = NULL;
-
- for ( i = solidCounter - 2; i >= 0; i-- )
- {
- to_ctg = getTwinCtg ( * ( unsigned int * ) darrayGet ( solidArray, i ) );
- cnt = checkConnect ( from_ctg, to_ctg );
-
- if ( !cnt )
- {
- fprintf ( stderr, "consolidate B: no connect from %d to %d\n", from_ctg, to_ctg );
- return;
- }
-
- cnt->singleInScaf = solidCounter == 2 ? 1 : 0;
-
- if ( prevCNT )
- {
- setNextInScaf ( prevCNT, cnt );
- setPrevInScaf ( cnt, 1 );
- }
-
- prevCNT = cnt;
- from_ctg = to_ctg;
- }
+ int i, j;
+ CONNECT *prevCNT = NULL;
+ CONNECT *cnt;
+ unsigned int to_ctg;
+ unsigned int from_ctg = * ( unsigned int * ) darrayGet ( solidArray, 0 );
+
+ for ( i = 1; i < solidCounter; i++ )
+ {
+ to_ctg = * ( unsigned int * ) darrayGet ( solidArray, i );
+ cnt = checkConnect ( from_ctg, to_ctg );
+
+ if ( !cnt )
+ {
+ fprintf ( stderr, "consolidate A: no connect from %d to %d\n",
+ from_ctg, to_ctg );
+
+ for ( j = 0; j < solidCounter; j++ )
+ {
+ fprintf ( stderr, "%d-->", * ( unsigned int * ) darrayGet ( solidArray, j ) );
+ }
+
+ fprintf ( stderr, "\n" );
+ return;
+ }
+
+ cnt->singleInScaf = solidCounter == 2 ? 1 : 0;
+
+ if ( prevCNT )
+ {
+ setNextInScaf ( prevCNT, cnt );
+ setPrevInScaf ( cnt, 1 );
+ }
+
+ prevCNT = cnt;
+ from_ctg = to_ctg;
+ }
+
+ //the reverse complementary path
+ from_ctg = getTwinCtg ( * ( unsigned int * ) darrayGet ( solidArray, solidCounter - 1 ) );
+ prevCNT = NULL;
+
+ for ( i = solidCounter - 2; i >= 0; i-- )
+ {
+ to_ctg = getTwinCtg ( * ( unsigned int * ) darrayGet ( solidArray, i ) );
+ cnt = checkConnect ( from_ctg, to_ctg );
+
+ if ( !cnt )
+ {
+ fprintf ( stderr, "consolidate B: no connect from %d to %d\n", from_ctg, to_ctg );
+ return;
+ }
+
+ cnt->singleInScaf = solidCounter == 2 ? 1 : 0;
+
+ if ( prevCNT )
+ {
+ setNextInScaf ( prevCNT, cnt );
+ setPrevInScaf ( cnt, 1 );
+ }
+
+ prevCNT = cnt;
+ from_ctg = to_ctg;
+ }
}
static void debugging1 ( unsigned int ctg1, unsigned int ctg2 )
{
- CONNECT * cn1;
- cn1 = getCntBetween ( ctg1, ctg2 );
-
- if ( cn1 )
- {
- fprintf ( stderr, "(%d,%d) mask %d deleted %d w %d,singleInScaf %d\n",
- ctg1, ctg2, cn1->mask, cn1->deleted, cn1->weight, cn1->singleInScaf );
-
- if ( cn1->nextInScaf )
- { fprintf ( stderr, "%d->%d->%d\n", ctg1, ctg2, cn1->nextInScaf->contigID ); }
-
- if ( cn1->prevInScaf )
- { fprintf ( stderr, "*->%d->%d\n", ctg1, ctg2 ); }
- else if ( !cn1->nextInScaf )
- { fprintf ( stderr, "NULL->%d->%d->NULL\n", ctg1, ctg2 ); }
- }
- else
- { fprintf ( stderr, "%d -X- %d\n", ctg1, ctg2 ); }
+ CONNECT *cn1;
+ cn1 = getCntBetween ( ctg1, ctg2 );
+
+ if ( cn1 )
+ {
+ fprintf ( stderr, "(%d,%d) mask %d deleted %d w %d,singleInScaf %d\n",
+ ctg1, ctg2, cn1->mask, cn1->deleted, cn1->weight, cn1->singleInScaf );
+
+ if ( cn1->nextInScaf )
+ {
+ fprintf ( stderr, "%d->%d->%d\n", ctg1, ctg2, cn1->nextInScaf->contigID );
+ }
+
+ if ( cn1->prevInScaf )
+ {
+ fprintf ( stderr, "*->%d->%d\n", ctg1, ctg2 );
+ }
+ else if ( !cn1->nextInScaf )
+ {
+ fprintf ( stderr, "NULL->%d->%d->NULL\n", ctg1, ctg2 );
+ }
+ }
+ else
+ {
+ fprintf ( stderr, "%d -X- %d\n", ctg1, ctg2 );
+ }
}
/*************************************************
@@ -1650,201 +1764,231 @@ Return:
*************************************************/
static void removeTransitive()
{
- unsigned int i, bal_ctg;
- int flag = 1, out_num, in_num, count, min, max, linear;
- CONNECT * cn_temp, *cn1 = NULL, *cn2 = NULL;
- int multi_out = 0, single_out = 0, two_out = 0, may_transitive = 0, not_transitive = 0, cycle_num = 0, mask_ctg = 0, no_out = 0;
- fprintf ( stderr, "Start to remove transitive connection.\n" );
-
- while ( flag )
- {
- flag = 0;
- two_out = 0;
- not_transitive = 0;
- may_transitive = 0;
- cycle_num++;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].mask )
- {
- if ( cycle_num == 1 )
- { mask_ctg++; }
-
- continue;
- }
-
- out_num = validConnect ( i, NULL );
-
- if ( out_num != 2 )
- {
- if ( out_num == 1 && cycle_num == 1 )
- { single_out++; }
-
- if ( out_num > 2 && cycle_num == 1 )
- { multi_out++; }
-
- if ( out_num == 0 && cycle_num == 1 )
- { no_out++; }
-
- continue;
- }
-
- two_out++;
- cn_temp = contig_array[i].downwardConnect;
- count = 0;
-
- while ( cn_temp )
- {
- if ( cn_temp->deleted || cn_temp->mask )
- {
- cn_temp = cn_temp->next;
- continue;
- }
-
- count++;
-
- if ( count == 1 )
- { cn1 = cn_temp; }
- else if ( count == 2 )
- {
- cn2 = cn_temp;
- }
- else
- { break; }
-
- cn_temp = cn_temp->next;
- }
-
- if ( count > 2 )
- {
- fprintf ( stderr, "%d valid connections from ctg %d\n", count, i );
- continue;
- }
-
- if ( cn1->gapLen > cn2->gapLen )
- {
- cn_temp = cn1;
- cn1 = cn2;
- cn2 = cn_temp;
- } //make sure cn1 is closer to contig i than cn2
-
- if ( cn1->prevInScaf && cn2->prevInScaf )
- { continue; }
-
- bal_ctg = getTwinCtg ( cn2->contigID );
- in_num = validConnect ( bal_ctg, NULL );
-
- if ( in_num > 2 )
- { continue; }
-
- int bal_c1 = getTwinCtg ( cn1->contigID );
- in_num = validConnect ( bal_c1, NULL );
-
- if ( in_num > 1 )
- { continue; }
-
- min = cn2->gapLen - cn1->gapLen - contig_array[cn1->contigID].length - ins_size_var / 2;
- max = cn2->gapLen - cn1->gapLen - contig_array[cn1->contigID].length + ins_size_var / 2;
-
- if ( max < 0 )
- { continue; }
-
- may_transitive++;
- //temprarily delete cn2
- setConnectDelete ( i, cn2->contigID, 1, 0 );
- int oldc2 = cn2->contigID;
- linear = linearC2C ( i, cn1, cn2->contigID, min, max );
-
- if ( linear != 1 )
- {
- not_transitive++;
- setConnectDelete ( i, cn2->contigID, 0, 0 );
- continue;
- }
- else
- {
- downstreamCTG[0] = i;
- catUsDsContig();
-
- if ( !checkSimple ( solidArray, solidCounter ) )
- { continue; }
-
- cn1 = getCntBetween ( * ( unsigned int * ) darrayGet ( solidArray, solidCounter - 2 ),
- * ( unsigned int * ) darrayGet ( solidArray, solidCounter - 1 ) );
-
- if ( cn1 && cn1->nextInScaf && cn2->nextInScaf )
- {
- setConnectDelete ( i, cn2->contigID, 0, 0 );
- continue;
- }
-
- consolidate();
-
- if ( cn2->prevInScaf )
- substitueDSinScaf ( cn2, * ( unsigned int * ) darrayGet ( solidArray, 0 ),
- * ( unsigned int * ) darrayGet ( solidArray, 1 ) );
-
- if ( cn2->nextInScaf )
- { substitueUSinScaf ( cn2, * ( unsigned int * ) darrayGet ( solidArray, solidCounter - 2 ) ); }
-
- flag++;
- }
- }
-
- if ( cycle_num == 1 )
- {
- fprintf ( stderr, "Total contigs %u\n", num_ctg );
- fprintf ( stderr, "Masked contigs %d\n", mask_ctg );
- fprintf ( stderr, "Remained contigs %u\n", num_ctg - mask_ctg );
- fprintf ( stderr, "None-outgoing-connection contigs %d", no_out );
-
- if ( num_ctg - mask_ctg > 0 )
- {
- fprintf ( stderr, " (%1f%%)", ( float ) no_out / ( num_ctg - mask_ctg ) * 100 );
- }
-
- fprintf ( stderr, "\nSingle-outgoing-connection contigs %d\n", single_out );
- fprintf ( stderr, "Multi-outgoing-connection contigs %d\n", multi_out );
- }
-
- fprintf ( stderr, "Cycle %d\n Two-outgoing-connection contigs %d\n Potential transitive connections %d\n Transitive connections %d\n", cycle_num, two_out, may_transitive, flag );
-
- if ( two_out > 0 )
- {
- fprintf ( stderr, " Transitive ratio %.1f%%\n", ( float ) flag / two_out * 100 );
- }
-
- if ( flag == 0 )
- { break; }
- }
+ unsigned int i, bal_ctg;
+ int flag = 1, out_num, in_num, count, min, max, linear;
+ CONNECT *cn_temp, *cn1 = NULL, *cn2 = NULL;
+ int multi_out = 0, single_out = 0, two_out = 0, may_transitive = 0, not_transitive = 0, cycle_num = 0, mask_ctg = 0, no_out = 0;
+ fprintf ( stderr, "Start to remove transitive connection.\n" );
+
+ while ( flag )
+ {
+ flag = 0;
+ two_out = 0;
+ not_transitive = 0;
+ may_transitive = 0;
+ cycle_num++;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].mask )
+ {
+ if ( cycle_num == 1 )
+ {
+ mask_ctg++;
+ }
+
+ continue;
+ }
+
+ out_num = validConnect ( i, NULL );
+
+ if ( out_num != 2 )
+ {
+ if ( out_num == 1 && cycle_num == 1 )
+ {
+ single_out++;
+ }
+
+ if ( out_num > 2 && cycle_num == 1 )
+ {
+ multi_out++;
+ }
+
+ if ( out_num == 0 && cycle_num == 1 )
+ {
+ no_out++;
+ }
+
+ continue;
+ }
+
+ two_out++;
+ cn_temp = contig_array[i].downwardConnect;
+ count = 0;
+
+ while ( cn_temp )
+ {
+ if ( cn_temp->deleted || cn_temp->mask )
+ {
+ cn_temp = cn_temp->next;
+ continue;
+ }
+
+ count++;
+
+ if ( count == 1 )
+ {
+ cn1 = cn_temp;
+ }
+ else if ( count == 2 )
+ {
+ cn2 = cn_temp;
+ }
+ else
+ {
+ break;
+ }
+
+ cn_temp = cn_temp->next;
+ }
+
+ if ( count > 2 )
+ {
+ fprintf ( stderr, "%d valid connections from ctg %d\n", count, i );
+ continue;
+ }
+
+ if ( cn1->gapLen > cn2->gapLen )
+ {
+ cn_temp = cn1;
+ cn1 = cn2;
+ cn2 = cn_temp;
+ } //make sure cn1 is closer to contig i than cn2
+
+ if ( cn1->prevInScaf && cn2->prevInScaf )
+ {
+ continue;
+ }
+
+ bal_ctg = getTwinCtg ( cn2->contigID );
+ in_num = validConnect ( bal_ctg, NULL );
+
+ if ( in_num > 2 )
+ {
+ continue;
+ }
+
+ int bal_c1 = getTwinCtg ( cn1->contigID );
+ in_num = validConnect ( bal_c1, NULL );
+
+ if ( in_num > 1 )
+ {
+ continue;
+ }
+
+ min = cn2->gapLen - cn1->gapLen - contig_array[cn1->contigID].length - ins_size_var / 2;
+ max = cn2->gapLen - cn1->gapLen - contig_array[cn1->contigID].length + ins_size_var / 2;
+
+ if ( max < 0 )
+ {
+ continue;
+ }
+
+ may_transitive++;
+ //temprarily delete cn2
+ setConnectDelete ( i, cn2->contigID, 1, 0 );
+ int oldc2 = cn2->contigID;
+ linear = linearC2C ( i, cn1, cn2->contigID, min, max );
+
+ if ( linear != 1 )
+ {
+ not_transitive++;
+ setConnectDelete ( i, cn2->contigID, 0, 0 );
+ continue;
+ }
+ else
+ {
+ downstreamCTG[0] = i;
+ catUsDsContig();
+
+ if ( !checkSimple ( solidArray, solidCounter ) )
+ {
+ continue;
+ }
+
+ cn1 = getCntBetween ( * ( unsigned int * ) darrayGet ( solidArray, solidCounter - 2 ),
+ * ( unsigned int * ) darrayGet ( solidArray, solidCounter - 1 ) );
+
+ if ( cn1 && cn1->nextInScaf && cn2->nextInScaf )
+ {
+ setConnectDelete ( i, cn2->contigID, 0, 0 );
+ continue;
+ }
+
+ consolidate();
+
+ if ( cn2->prevInScaf )
+ substitueDSinScaf ( cn2, * ( unsigned int * ) darrayGet ( solidArray, 0 ),
+ * ( unsigned int * ) darrayGet ( solidArray, 1 ) );
+
+ if ( cn2->nextInScaf )
+ {
+ substitueUSinScaf ( cn2, * ( unsigned int * ) darrayGet ( solidArray, solidCounter - 2 ) );
+ }
+
+ flag++;
+ }
+ }
+
+ if ( cycle_num == 1 )
+ {
+ fprintf ( stderr, "Total contigs %u\n", num_ctg );
+ fprintf ( stderr, "Masked contigs %d\n", mask_ctg );
+ fprintf ( stderr, "Remained contigs %u\n", num_ctg - mask_ctg );
+ fprintf ( stderr, "None-outgoing-connection contigs %d", no_out );
+
+ if ( num_ctg - mask_ctg > 0 )
+ {
+ fprintf ( stderr, " (%1f%%)", ( float ) no_out / ( num_ctg - mask_ctg ) * 100 );
+ }
+
+ fprintf ( stderr, "\nSingle-outgoing-connection contigs %d\n", single_out );
+ fprintf ( stderr, "Multi-outgoing-connection contigs %d\n", multi_out );
+ }
+
+ fprintf ( stderr, "Cycle %d\n Two-outgoing-connection contigs %d\n Potential transitive connections %d\n Transitive connections %d\n", cycle_num, two_out, may_transitive, flag );
+
+ if ( two_out > 0 )
+ {
+ fprintf ( stderr, " Transitive ratio %.1f%%\n", ( float ) flag / two_out * 100 );
+ }
+
+ if ( flag == 0 )
+ {
+ break;
+ }
+ }
}
static void debugging2 ( unsigned int ctg )
{
- if ( ctg > num_ctg )
- {
- return;
- }
-
- CONNECT * cn1 = contig_array[ctg].downwardConnect;
-
- while ( cn1 )
- {
- if ( cn1->nextInScaf )
- { fprintf ( stderr, "with nextInScaf %u,", cn1->nextInScaf->contigID ); }
-
- if ( cn1->prevInScaf )
- { fprintf ( stderr, "with prevInScaf," ); }
-
- fprintf ( stderr, "%u >> %u, weight %d, gapLen %d, mask %d deleted %d, inherit %d, singleInScaf %d, bySmall %d\n",
- ctg, cn1->contigID, cn1->weight, cn1->gapLen, cn1->mask, cn1->deleted, cn1->inherit, cn1->singleInScaf, cn1->bySmall );
- cn1 = cn1->next;
- }
+ if ( ctg > num_ctg )
+ {
+ return;
+ }
+
+ CONNECT *cn1 = contig_array[ctg].downwardConnect;
+
+ while ( cn1 )
+ {
+ if ( cn1->nextInScaf )
+ {
+ fprintf ( stderr, "with nextInScaf %u,", cn1->nextInScaf->contigID );
+ }
+
+ if ( cn1->prevInScaf )
+ {
+ fprintf ( stderr, "with prevInScaf," );
+ }
+
+ fprintf ( stderr, "%u >> %u, weight %d, gapLen %d, mask %d deleted %d, inherit %d, singleInScaf %d, bySmall %d\n",
+ ctg, cn1->contigID, cn1->weight, cn1->gapLen, cn1->mask, cn1->deleted, cn1->inherit, cn1->singleInScaf, cn1->bySmall );
+ cn1 = cn1->next;
+ }
}
static void debugging()
{
- // debugging1(13298356, 13245956);
+ // debugging1(13298356, 13245956);
}
/*************************************************
@@ -1864,10 +2008,10 @@ Return:
*************************************************/
static void simplifyCnt()
{
- removeTransitive();
- debugging();
- general_linearization ( 1 );
- debugging();
+ removeTransitive();
+ debugging();
+ general_linearization ( 1 );
+ debugging();
}
/*************************************************
@@ -1885,13 +2029,15 @@ Return:
*************************************************/
static int getIndexInArray ( unsigned int node )
{
- int index;
+ int index;
- for ( index = 0; index < nodeCounter; index++ )
- if ( nodesInSub[index] == node )
- { return index; }
+ for ( index = 0; index < nodeCounter; index++ )
+ if ( nodesInSub[index] == node )
+ {
+ return index;
+ }
- return -1;
+ return -1;
}
/*************************************************
@@ -1911,22 +2057,24 @@ Return:
1 if operation succeeded.
-1 if index was larger than allowed maximum sub-graph size.
*************************************************/
-static boolean putNodeIntoSubgraph ( FibHeap * heap, int distance, unsigned int node, int index )
+static boolean putNodeIntoSubgraph ( FibHeap *heap, int distance, unsigned int node, int index )
{
- int pos = getIndexInArray ( node );
-
- if ( pos > 0 )
- {
- return 0;
- }
-
- if ( index >= MaxNodeInSub )
- { return -1; }
-
- insertNodeIntoHeap ( heap, distance, node );
- nodesInSub[index] = node;
- nodeDistance[index] = distance;
- return 1;
+ int pos = getIndexInArray ( node );
+
+ if ( pos > 0 )
+ {
+ return 0;
+ }
+
+ if ( index >= MaxNodeInSub )
+ {
+ return -1;
+ }
+
+ insertNodeIntoHeap ( heap, distance, node );
+ nodesInSub[index] = node;
+ nodeDistance[index] = distance;
+ return 1;
}
/*************************************************
@@ -1946,35 +2094,39 @@ Output:
Return:
0 if operation of putting contig into sub-graph failed.
*************************************************/
-static boolean putChainIntoSubgraph ( FibHeap * heap, int distance, unsigned int node, int * index, CONNECT * prevC )
+static boolean putChainIntoSubgraph ( FibHeap *heap, int distance, unsigned int node, int *index, CONNECT *prevC )
{
- unsigned int ctg = node;
- CONNECT * nextCnt;
- boolean excep, flag;
- int counter = *index;
-
- while ( 1 )
- {
- nextCnt = getNextContig ( ctg, prevC, &excep );
-
- if ( excep || !nextCnt )
- {
- *index = counter;
- return 1;
- }
-
- ctg = nextCnt->contigID;
- distance += nextCnt->gapLen + contig_array[ctg].length;
- flag = putNodeIntoSubgraph ( heap, distance, ctg, counter );
-
- if ( flag < 0 )
- { return 0; }
-
- if ( flag > 0 )
- { counter++; }
-
- prevC = nextCnt;
- }
+ unsigned int ctg = node;
+ CONNECT *nextCnt;
+ boolean excep, flag;
+ int counter = *index;
+
+ while ( 1 )
+ {
+ nextCnt = getNextContig ( ctg, prevC, &excep );
+
+ if ( excep || !nextCnt )
+ {
+ *index = counter;
+ return 1;
+ }
+
+ ctg = nextCnt->contigID;
+ distance += nextCnt->gapLen + contig_array[ctg].length;
+ flag = putNodeIntoSubgraph ( heap, distance, ctg, counter );
+
+ if ( flag < 0 )
+ {
+ return 0;
+ }
+
+ if ( flag > 0 )
+ {
+ counter++;
+ }
+
+ prevC = nextCnt;
+ }
}
/*************************************************
@@ -1994,61 +2146,65 @@ Return:
*************************************************/
static boolean checkUnique ( unsigned int node, double tolerance )
{
- CONNECT * ite_cnt;
- unsigned int currNode;
- int distance;
- int popCounter = 0;
- boolean flag;
- currNode = node;
- FibHeap * heap = newFibHeap();
- putNodeIntoSubgraph ( heap, 0, currNode, 0 );
- nodeCounter = 1;
- ite_cnt = contig_array[currNode].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->deleted || ite_cnt->mask )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- currNode = ite_cnt->contigID;
- distance = ite_cnt->gapLen + contig_array[currNode].length;
- flag = putNodeIntoSubgraph ( heap, distance, currNode, nodeCounter );
-
- if ( flag < 0 )
- {
- destroyHeap ( heap );
- return 0;
- }
-
- if ( flag > 0 )
- { nodeCounter++; }
-
- flag = putChainIntoSubgraph ( heap, distance, currNode, &nodeCounter, ite_cnt );
-
- if ( !flag )
- {
- destroyHeap ( heap );
- return 0;
- }
-
- ite_cnt = ite_cnt->next;
- }
-
- if ( nodeCounter <= 2 ) // no more than 2 valid connections
- {
- destroyHeap ( heap );
- return 1;
- }
-
- while ( ( currNode = removeNextNodeFromHeap ( heap ) ) != 0 )
- { nodesInSubInOrder[popCounter++] = currNode; }
-
- destroyHeap ( heap );
- flag = checkOverlapInBetween ( tolerance );
- return flag;
+ CONNECT *ite_cnt;
+ unsigned int currNode;
+ int distance;
+ int popCounter = 0;
+ boolean flag;
+ currNode = node;
+ FibHeap *heap = newFibHeap();
+ putNodeIntoSubgraph ( heap, 0, currNode, 0 );
+ nodeCounter = 1;
+ ite_cnt = contig_array[currNode].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->deleted || ite_cnt->mask )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ currNode = ite_cnt->contigID;
+ distance = ite_cnt->gapLen + contig_array[currNode].length;
+ flag = putNodeIntoSubgraph ( heap, distance, currNode, nodeCounter );
+
+ if ( flag < 0 )
+ {
+ destroyHeap ( heap );
+ return 0;
+ }
+
+ if ( flag > 0 )
+ {
+ nodeCounter++;
+ }
+
+ flag = putChainIntoSubgraph ( heap, distance, currNode, &nodeCounter, ite_cnt );
+
+ if ( !flag )
+ {
+ destroyHeap ( heap );
+ return 0;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ if ( nodeCounter <= 2 ) // no more than 2 valid connections
+ {
+ destroyHeap ( heap );
+ return 1;
+ }
+
+ while ( ( currNode = removeNextNodeFromHeap ( heap ) ) != 0 )
+ {
+ nodesInSubInOrder[popCounter++] = currNode;
+ }
+
+ destroyHeap ( heap );
+ flag = checkOverlapInBetween ( tolerance );
+ return flag;
}
/*************************************************
@@ -2065,62 +2221,80 @@ Return:
*************************************************/
static void maskRepeat()
{
- int in_num, out_num, flagA, flagB;
- int counter = 0;
- int puzzleCounter = 0;
- unsigned int i, bal_i;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].mask )
- { continue; }
-
- bal_i = getTwinCtg ( i );
- in_num = validConnect ( bal_i, NULL );
- out_num = validConnect ( i, NULL );
-
- if ( in_num > 1 || out_num > 1 )
- { puzzleCounter++; }
- else
- {
- if ( isSmallerThanTwin ( i ) )
- { i++; }
-
- continue;
- }
-
- if ( contig_array[i].cvg > 1.4 * cvgAvg )
- {
- counter++;
- maskContig ( i, 1 );
-
- if ( isSmallerThanTwin ( i ) )
- { i++; }
-
- continue;
- }
-
- if ( in_num > 1 )
- { flagA = checkUnique ( bal_i, OverlapPercent ); }
- else
- { flagA = 1; }
-
- if ( out_num > 1 )
- { flagB = checkUnique ( i, OverlapPercent ); }
- else
- { flagB = 1; }
-
- if ( !flagA || !flagB )
- {
- counter++;
- maskContig ( i, 1 );
- }
-
- if ( isSmallerThanTwin ( i ) )
- { i++; }
- }
-
- fprintf ( stderr, "Mask repeats:\n Puzzles %d\n Masked contigs %d\n", puzzleCounter, counter );
+ int in_num, out_num, flagA, flagB;
+ int counter = 0;
+ int puzzleCounter = 0;
+ unsigned int i, bal_i;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].mask )
+ {
+ continue;
+ }
+
+ bal_i = getTwinCtg ( i );
+ in_num = validConnect ( bal_i, NULL );
+ out_num = validConnect ( i, NULL );
+
+ if ( in_num > 1 || out_num > 1 )
+ {
+ puzzleCounter++;
+ }
+ else
+ {
+ if ( isSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+
+ continue;
+ }
+
+ if ( contig_array[i].cvg > 1.4 * cvgAvg )
+ {
+ counter++;
+ maskContig ( i, 1 );
+
+ if ( isSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+
+ continue;
+ }
+
+ if ( in_num > 1 )
+ {
+ flagA = checkUnique ( bal_i, OverlapPercent );
+ }
+ else
+ {
+ flagA = 1;
+ }
+
+ if ( out_num > 1 )
+ {
+ flagB = checkUnique ( i, OverlapPercent );
+ }
+ else
+ {
+ flagB = 1;
+ }
+
+ if ( !flagA || !flagB )
+ {
+ counter++;
+ maskContig ( i, 1 );
+ }
+
+ if ( isSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+ }
+
+ fprintf ( stderr, "Mask repeats:\n Puzzles %d\n Masked contigs %d\n", puzzleCounter, counter );
}
/*************************************************
@@ -2137,21 +2311,25 @@ Return:
*************************************************/
static int Countlink()
{
- unsigned int i, bal_i;
- int conflict_count = 0;
+ unsigned int i, bal_i;
+ int conflict_count = 0;
- for ( i = 1; i < num_ctg; i++ )
- {
- if ( contig_array[i].mask )
- { continue; }
+ for ( i = 1; i < num_ctg; i++ )
+ {
+ if ( contig_array[i].mask )
+ {
+ continue;
+ }
- int out_num = validConnect ( i, NULL );
+ int out_num = validConnect ( i, NULL );
- if ( out_num > 1 )
- { conflict_count++; }
- }
+ if ( out_num > 1 )
+ {
+ conflict_count++;
+ }
+ }
- return conflict_count;
+ return conflict_count;
}
/*************************************************
@@ -2170,43 +2348,47 @@ Output:
Return:
None.
*************************************************/
-static void ordering ( boolean deWeak, boolean downS, boolean nonlinear, char * infile )
+static void ordering ( boolean deWeak, boolean downS, boolean nonlinear, char *infile )
{
- int conf0, conf1, conf2, conf3, conf4, conf5;
- debugging();
-
- if ( downS )
- {
- downSlide();
- debugging();
-
- if ( deWeak )
- { deleteWeakCnt ( weakPE ); }
- }
- else
- {
- if ( deWeak )
- { deleteWeakCnt ( weakPE ); }
- }
-
- debugging();
- simplifyCnt();
- debugging();
- maskRepeat();
- debugging();
- simplifyCnt();
- debugging();
-
- if ( nonlinear )
- {
- fprintf ( stderr, "Non-strict linearization.\n" );
- general_linearization ( 0 );
- }
-
- maskPuzzle ( 2, 0 );
- debugging();
- freezing();
- debugging();
+ int conf0, conf1, conf2, conf3, conf4, conf5;
+ debugging();
+
+ if ( downS )
+ {
+ downSlide();
+ debugging();
+
+ if ( deWeak )
+ {
+ deleteWeakCnt ( weakPE );
+ }
+ }
+ else
+ {
+ if ( deWeak )
+ {
+ deleteWeakCnt ( weakPE );
+ }
+ }
+
+ debugging();
+ simplifyCnt();
+ debugging();
+ maskRepeat();
+ debugging();
+ simplifyCnt();
+ debugging();
+
+ if ( nonlinear )
+ {
+ fprintf ( stderr, "Non-strict linearization.\n" );
+ general_linearization ( 0 );
+ }
+
+ maskPuzzle ( 2, 0 );
+ debugging();
+ freezing();
+ debugging();
}
@@ -2224,38 +2406,42 @@ Return:
*************************************************/
boolean checkOverlapInBetween ( double tolerance )
{
- int i, gap;
- int index;
- unsigned int node;
- int lenSum, lenOlp;
- lenSum = lenOlp = 0;
-
- for ( i = 0; i < nodeCounter; i++ )
- {
- node = nodesInSubInOrder[i];
- lenSum += contig_array[node].length;
- index = getIndexInArray ( node );
- nodeDistanceInOrder[i] = nodeDistance[index];
- }
-
- if ( lenSum < 1 )
- { return 1; }
-
- for ( i = 0; i < nodeCounter - 1; i++ )
- {
- gap = nodeDistanceInOrder[i + 1] - nodeDistanceInOrder[i]
- - contig_array[nodesInSubInOrder[i + 1]].length;
-
- if ( -gap > 0 )
- { lenOlp += -gap; }
-
- if ( ( double ) lenOlp / lenSum > tolerance )
- {
- return 0;
- }
- }
-
- return 1;
+ int i, gap;
+ int index;
+ unsigned int node;
+ int lenSum, lenOlp;
+ lenSum = lenOlp = 0;
+
+ for ( i = 0; i < nodeCounter; i++ )
+ {
+ node = nodesInSubInOrder[i];
+ lenSum += contig_array[node].length;
+ index = getIndexInArray ( node );
+ nodeDistanceInOrder[i] = nodeDistance[index];
+ }
+
+ if ( lenSum < 1 )
+ {
+ return 1;
+ }
+
+ for ( i = 0; i < nodeCounter - 1; i++ )
+ {
+ gap = nodeDistanceInOrder[i + 1] - nodeDistanceInOrder[i]
+ - contig_array[nodesInSubInOrder[i + 1]].length;
+
+ if ( -gap > 0 )
+ {
+ lenOlp += -gap;
+ }
+
+ if ( ( double ) lenOlp / lenSum > tolerance )
+ {
+ return 0;
+ }
+ }
+
+ return 1;
}
@@ -2278,114 +2464,132 @@ Output:
Return:
o if setting successed.
*************************************************/
-static boolean setUsed ( unsigned int start, unsigned int * array, int max_steps, boolean flag )
+static boolean setUsed ( unsigned int start, unsigned int *array, int max_steps, boolean flag )
{
- unsigned int prevCtg = start;
- unsigned int twinA, twinB;
- int j;
- CONNECT * cnt;
- boolean usedFlag = 0;
- // save 'used' to 'checking'
- prevCtg = start;
-
- for ( j = 0; j < max_steps; j++ )
- {
- if ( array[j] == 0 )
- { break; }
-
- cnt = getCntBetween ( prevCtg, array[j] );
-
- if ( !cnt )
- {
- fprintf ( stderr, "setUsed: no connect between %d and %d\n", prevCtg, array[j] );
- prevCtg = array[j];
- continue;
- }
-
- if ( cnt->used == flag || cnt->nextInScaf || cnt->prevInScaf || cnt->singleInScaf )
- {
- return 1;
- }
-
- cnt->checking = cnt->used;
- twinA = getTwinCtg ( prevCtg );
- twinB = getTwinCtg ( array[j] );
- cnt = getCntBetween ( twinB, twinA );
-
- if ( cnt )
- { cnt->checking = cnt->used; }
-
- prevCtg = array[j];
- }
-
- // set used to flag
- prevCtg = start;
-
- for ( j = 0; j < max_steps; j++ )
- {
- if ( array[j] == 0 )
- { break; }
-
- cnt = getCntBetween ( prevCtg, array[j] );
-
- if ( !cnt )
- {
- prevCtg = array[j];
- continue;
- }
-
- if ( cnt->used == flag )
- {
- usedFlag = 1;
- break;
- }
-
- cnt->used = flag;
- twinA = getTwinCtg ( prevCtg );
- twinB = getTwinCtg ( array[j] );
- cnt = getCntBetween ( twinB, twinA );
-
- if ( cnt )
- { cnt->used = flag; }
-
- prevCtg = array[j];
- }
-
- // set mask to 'NOT flag' or set used to original value
- prevCtg = start;
-
- for ( j = 0; j < max_steps; j++ )
- {
- if ( array[j] == 0 )
- { break; }
-
- cnt = getCntBetween ( prevCtg, array[j] );
-
- if ( !cnt )
- {
- prevCtg = array[j];
- continue;
- }
-
- if ( !usedFlag )
- { cnt->mask = 1 - flag; }
- else
- { cnt->used = cnt->checking; }
-
- twinA = getTwinCtg ( prevCtg );
- twinB = getTwinCtg ( array[j] );
- cnt = getCntBetween ( twinB, twinA );
- cnt->used = 1 - flag;
-
- if ( !usedFlag )
- { cnt->mask = 1 - flag; }
- else
- { cnt->used = cnt->checking; }
-
- prevCtg = array[j];
- }
-
- return usedFlag;
+ unsigned int prevCtg = start;
+ unsigned int twinA, twinB;
+ int j;
+ CONNECT *cnt;
+ boolean usedFlag = 0;
+ // save 'used' to 'checking'
+ prevCtg = start;
+
+ for ( j = 0; j < max_steps; j++ )
+ {
+ if ( array[j] == 0 )
+ {
+ break;
+ }
+
+ cnt = getCntBetween ( prevCtg, array[j] );
+
+ if ( !cnt )
+ {
+ fprintf ( stderr, "setUsed: no connect between %d and %d\n", prevCtg, array[j] );
+ prevCtg = array[j];
+ continue;
+ }
+
+ if ( cnt->used == flag || cnt->nextInScaf || cnt->prevInScaf || cnt->singleInScaf )
+ {
+ return 1;
+ }
+
+ cnt->checking = cnt->used;
+ twinA = getTwinCtg ( prevCtg );
+ twinB = getTwinCtg ( array[j] );
+ cnt = getCntBetween ( twinB, twinA );
+
+ if ( cnt )
+ {
+ cnt->checking = cnt->used;
+ }
+
+ prevCtg = array[j];
+ }
+
+ // set used to flag
+ prevCtg = start;
+
+ for ( j = 0; j < max_steps; j++ )
+ {
+ if ( array[j] == 0 )
+ {
+ break;
+ }
+
+ cnt = getCntBetween ( prevCtg, array[j] );
+
+ if ( !cnt )
+ {
+ prevCtg = array[j];
+ continue;
+ }
+
+ if ( cnt->used == flag )
+ {
+ usedFlag = 1;
+ break;
+ }
+
+ cnt->used = flag;
+ twinA = getTwinCtg ( prevCtg );
+ twinB = getTwinCtg ( array[j] );
+ cnt = getCntBetween ( twinB, twinA );
+
+ if ( cnt )
+ {
+ cnt->used = flag;
+ }
+
+ prevCtg = array[j];
+ }
+
+ // set mask to 'NOT flag' or set used to original value
+ prevCtg = start;
+
+ for ( j = 0; j < max_steps; j++ )
+ {
+ if ( array[j] == 0 )
+ {
+ break;
+ }
+
+ cnt = getCntBetween ( prevCtg, array[j] );
+
+ if ( !cnt )
+ {
+ prevCtg = array[j];
+ continue;
+ }
+
+ if ( !usedFlag )
+ {
+ cnt->mask = 1 - flag;
+ }
+ else
+ {
+ cnt->used = cnt->checking;
+ }
+
+ twinA = getTwinCtg ( prevCtg );
+ twinB = getTwinCtg ( array[j] );
+ cnt = getCntBetween ( twinB, twinA );
+ cnt->used = 1 - flag;
+
+ if ( !usedFlag )
+ {
+ cnt->mask = 1 - flag;
+ }
+ else
+ {
+ cnt->used = cnt->checking;
+ }
+
+ prevCtg = array[j];
+ }
+
+ return usedFlag;
}
@@ -2406,46 +2610,56 @@ Output:
Return:
1 if this contig had enough support from other contigs.
*************************************************/
-int score_pass ( DARRAY * array, int Counter, int beforep, int afterp, int id )
+int score_pass ( DARRAY *array, int Counter, int beforep, int afterp, int id )
{
- int outnum = allConnect ( id, NULL );
- int innum = allConnect ( getTwinCtg ( id ), NULL );
- int start = beforep - 2 * innum > 0 ? beforep - 2 * innum : 0;
- int end = afterp + 2 * outnum < Counter ? afterp + 2 * outnum : Counter;
- int i, inc = 1, outc = 1;
- CONNECT * dh_cnt;
-
- for ( i = start; i < end; i++ )
- {
- if ( i < beforep )
- {
- dh_cnt = getCntBetween ( * ( unsigned int * ) darrayGet ( array, i ), id );
-
- if ( dh_cnt )
- { inc++; }
- }
-
- if ( i > afterp )
- {
- dh_cnt = getCntBetween ( id, * ( unsigned int * ) darrayGet ( array, i ) );
-
- if ( dh_cnt )
- { outc++; }
- }
- }
-
- if ( inc == innum || outc == outnum )
- { return 1; }
-
- if ( ( inc == 1 && innum > 2 ) || ( outc == 1 && outnum > 2 ) )
- { return 0; }
-
- int score = ( int ) ( ( ( double ) ( inc * outc ) / ( double ) ( innum * outnum ) ) * 100 );
-
- if ( score > 30 )
- { return 1; }
-
- return 0;
+ int outnum = allConnect ( id, NULL );
+ int innum = allConnect ( getTwinCtg ( id ), NULL );
+ int start = beforep - 2 * innum > 0 ? beforep - 2 * innum : 0;
+ int end = afterp + 2 * outnum < Counter ? afterp + 2 * outnum : Counter;
+ int i, inc = 1, outc = 1;
+ CONNECT *dh_cnt;
+
+ for ( i = start; i < end; i++ )
+ {
+ if ( i < beforep )
+ {
+ dh_cnt = getCntBetween ( * ( unsigned int * ) darrayGet ( array, i ), id );
+
+ if ( dh_cnt )
+ {
+ inc++;
+ }
+ }
+
+ if ( i > afterp )
+ {
+ dh_cnt = getCntBetween ( id, * ( unsigned int * ) darrayGet ( array, i ) );
+
+ if ( dh_cnt )
+ {
+ outc++;
+ }
+ }
+ }
+
+ if ( inc == innum || outc == outnum )
+ {
+ return 1;
+ }
+
+ if ( ( inc == 1 && innum > 2 ) || ( outc == 1 && outnum > 2 ) )
+ {
+ return 0;
+ }
+
+ int score = ( int ) ( ( ( double ) ( inc * outc ) / ( double ) ( innum * outnum ) ) * 100 );
+
+ if ( score > 30 )
+ {
+ return 1;
+ }
+
+ return 0;
}
/*************************************************
@@ -2467,283 +2681,319 @@ Return:
*************************************************/
static void recoverMask()
{
- unsigned int i, ctg, bal_ctg, start, finish;
- int num3, num5, j, t;
- CONNECT * bindCnt, *cnt;
- int min, max, max_steps = 5, num_route, length;
- int tempCounter, recoverCounter = 0;
- boolean multiUSE, change;
- int stat[] = {0, 0, 0, 0, 0, 0, 0};
-
- for ( i = 1; i <= num_ctg; i++ )
- { contig_array[i].flag = 0; }
-
- so_far = ( unsigned int * ) ckalloc ( max_n_routes * sizeof ( unsigned int ) );
- found_routes = ( unsigned int ** ) ckalloc ( max_n_routes * sizeof ( unsigned int * ) );
-
- for ( j = 0; j < max_n_routes; j++ )
- { found_routes[j] = ( unsigned int * ) ckalloc ( max_steps * sizeof ( unsigned int ) ); }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect )
- { continue; }
-
- bindCnt = getBindCnt ( i );
-
- if ( !bindCnt )
- { continue; }
-
- //first scan get the average coverage by longer pe
- num5 = num3 = 0;
- ctg = i;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
- contig_array[i].flag = 1;
- contig_array[getTwinCtg ( i )].flag = 1;
-
- while ( bindCnt )
- {
- if ( bindCnt->used )
- { break; }
-
- setConnectUsed ( ctg, bindCnt->contigID, 1 );
- ctg = bindCnt->contigID;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- bindCnt = bindCnt->nextInScaf;
- }
-
- ctg = getTwinCtg ( i );
- bindCnt = getBindCnt ( ctg );
-
- while ( bindCnt )
- {
- if ( bindCnt->used )
- { break; }
-
- setConnectUsed ( ctg, bindCnt->contigID, 1 );
- ctg = bindCnt->contigID;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
- bindCnt = bindCnt->nextInScaf;
- }
-
- if ( num5 + num3 < 2 )
- { continue; }
-
- tempCounter = solidCounter = 0;
-
- for ( j = num3 - 1; j >= 0; j-- )
- * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
- * ( unsigned int * ) darrayGet ( scaf3, j );
-
- for ( j = 0; j < num5; j++ )
- * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
- * ( unsigned int * ) darrayGet ( scaf5, j );
-
- change = 0;
-
- for ( t = 0; t < tempCounter - 1; t++ )
- {
- * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) =
- * ( unsigned int * ) darrayGet ( tempArray, t );
- start = * ( unsigned int * ) darrayGet ( tempArray, t );
- finish = * ( unsigned int * ) darrayGet ( tempArray, t + 1 );
- num_route = num_trace = 0;
- cnt = checkConnect ( start, finish );
-
- if ( !cnt )
- {
- fprintf ( stderr, "Warning from recoverMask: no connection (%d %d), start at %d\n",
- start, finish, i );
- cnt = getCntBetween ( start, finish );
-
- if ( cnt )
- { debugging1 ( start, finish ); }
-
- continue;
- }
-
- length = cnt->gapLen + contig_array[finish].length;
- min = length - 1.5 * ins_size_var;
- max = length + 1.5 * ins_size_var;
- traceAlongMaskedCnt ( finish, start, max_steps, min, max, 0, 0, &num_route );
-
- if ( finish == start )
- {
- for ( j = 0; j < tempCounter; j++ )
- { fprintf ( stderr, "->%d", * ( unsigned int * ) darrayGet ( tempArray, j ) ); }
-
- fprintf ( stderr, ": start at %d\n", i );
- }
-
- if ( num_route == 1 )
- {
- for ( j = 0; j < max_steps; j++ )
- if ( found_routes[0][j] == 0 )
- { break; }
-
- if ( j < 1 )
- { continue; }
-
- //check if connects have been used more than once
- multiUSE = setUsed ( start, found_routes[0], max_steps, 1 );
-
- if ( multiUSE )
- { continue; }
-
- for ( j = 0; j < max_steps; j++ )
- {
- if ( j + 1 == max_steps || found_routes[0][j + 1] == 0 )
- { break; }
-
- * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) = found_routes[0][j];
- contig_array[found_routes[0][j]].flag = 1;
- contig_array[getTwinCtg ( found_routes[0][j] )].flag = 1;
- }
-
- recoverCounter += j;
- setConnectDelete ( start, finish, 1, 1 );
- change = 1;
- stat[0]++;
- } //end if num_route=1
- else if ( num_route > 1 )
- {
- //multi-route.
- int k, l, num, longest = 0, longestid = 0;
- int merg = 0, quality = 0;
-
- // get the longest route.
- for ( k = 0; k < num_route; k++ )
- {
- for ( j = 0; j < max_steps; j++ )
- {
- if ( j + 1 == max_steps || found_routes[k][j + 1] == 0 )
- {
- if ( j > longest )
- {
- longest = j;
- longestid = k;
- }
-
- break;
- }
- }
- }
-
- stat[1]++;
-
- if ( longest == 1 ) //multi one.
- {
- stat[2]++;
-
- for ( k = 0; k < num_route; k++ )
- {
- if ( score_pass ( tempArray, tempCounter, t, t + 1, found_routes[k][0] ) )
- {
- longestid = k;
- quality = 1;
- stat[3]++;
- break;
- }
- }
-
- if ( quality == 0 )
- {
- continue;
- }
- }
- else
- {
- stat[4]++;
-
- for ( k = 0; k < num_route; k++ )
- {
- if ( k == longestid )
- { continue; }
-
- int merg_num = 0, total = 0;
-
- for ( j = 0; j < max_steps; j++ )
- {
- if ( j + 1 == max_steps || found_routes[k][j + 1] == 0 )
- {
- total = j;
- break;
- }
-
- for ( l = 0; l < longest; l++ )
- {
- if ( found_routes[k][j] == found_routes[longestid][l] )
- {
- merg_num++;
- break;
- }
- }
- }
-
- if ( merg_num == total )
- { merg++; }
- }
- }
-
- if ( merg == num_route - 1 || quality == 1 || merg >= longest )
- {
- multiUSE = setUsed ( start, found_routes[longestid], max_steps, 1 );
-
- if ( multiUSE )
- { continue; }
-
- stat[5]++;
-
- for ( j = 0; j < longest; j++ )
- {
- * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) = found_routes[longestid][j];
- contig_array[found_routes[longestid][j]].flag = 1;
- contig_array[getTwinCtg ( found_routes[longestid][j] )].flag = 1;
- }
-
- stat[6] += j;
- recoverCounter += j;
- setConnectDelete ( start, finish, 1, 1 );
- change = 1;
- }
- }
- }
-
- * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) =
- * ( unsigned int * ) darrayGet ( tempArray, tempCounter - 1 );
-
- if ( change )
- { consolidate(); }
- }
-
- fprintf ( stderr, "\nRecover contigs.\n" );
- fprintf ( stderr, " Total recovered contigs %d\n", recoverCounter );
- fprintf ( stderr, " Single-route cases %d\n", stat[0] );
- fprintf ( stderr, " Multi-route cases %d\n", stat[1] );
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- cnt = contig_array[i].downwardConnect;
-
- while ( cnt )
- {
- cnt->used = 0;
- cnt->checking = 0;
- cnt = cnt->next;
- }
- }
-
- for ( j = 0; j < max_n_routes; j++ )
- { free ( ( void * ) found_routes[j] ); }
-
- free ( ( void * ) found_routes );
- free ( ( void * ) so_far );
+ unsigned int i, ctg, bal_ctg, start, finish;
+ int num3, num5, j, t;
+ CONNECT *bindCnt, *cnt;
+ int min, max, max_steps = 5, num_route, length;
+ int tempCounter, recoverCounter = 0;
+ boolean multiUSE, change;
+ int stat[] = {0, 0, 0, 0, 0, 0, 0};
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ contig_array[i].flag = 0;
+ }
+
+ so_far = ( unsigned int * ) ckalloc ( max_n_routes * sizeof ( unsigned int ) );
+ found_routes = ( unsigned int ** ) ckalloc ( max_n_routes * sizeof ( unsigned int * ) );
+
+ for ( j = 0; j < max_n_routes; j++ )
+ {
+ found_routes[j] = ( unsigned int * ) ckalloc ( max_steps * sizeof ( unsigned int ) );
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect )
+ {
+ continue;
+ }
+
+ bindCnt = getBindCnt ( i );
+
+ if ( !bindCnt )
+ {
+ continue;
+ }
+
+ //first scan get the average coverage by longer pe
+ num5 = num3 = 0;
+ ctg = i;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
+ contig_array[i].flag = 1;
+ contig_array[getTwinCtg ( i )].flag = 1;
+
+ while ( bindCnt )
+ {
+ if ( bindCnt->used )
+ {
+ break;
+ }
+
+ setConnectUsed ( ctg, bindCnt->contigID, 1 );
+ ctg = bindCnt->contigID;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ ctg = getTwinCtg ( i );
+ bindCnt = getBindCnt ( ctg );
+
+ while ( bindCnt )
+ {
+ if ( bindCnt->used )
+ {
+ break;
+ }
+
+ setConnectUsed ( ctg, bindCnt->contigID, 1 );
+ ctg = bindCnt->contigID;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ if ( num5 + num3 < 2 )
+ {
+ continue;
+ }
+
+ tempCounter = solidCounter = 0;
+
+ for ( j = num3 - 1; j >= 0; j-- )
+ * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
+ * ( unsigned int * ) darrayGet ( scaf3, j );
+
+ for ( j = 0; j < num5; j++ )
+ * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
+ * ( unsigned int * ) darrayGet ( scaf5, j );
+
+ change = 0;
+
+ for ( t = 0; t < tempCounter - 1; t++ )
+ {
+ * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) =
+ * ( unsigned int * ) darrayGet ( tempArray, t );
+ start = * ( unsigned int * ) darrayGet ( tempArray, t );
+ finish = * ( unsigned int * ) darrayGet ( tempArray, t + 1 );
+ num_route = num_trace = 0;
+ cnt = checkConnect ( start, finish );
+
+ if ( !cnt )
+ {
+ fprintf ( stderr, "Warning from recoverMask: no connection (%d %d), start at %d\n",
+ start, finish, i );
+ cnt = getCntBetween ( start, finish );
+
+ if ( cnt )
+ {
+ debugging1 ( start, finish );
+ }
+
+ continue;
+ }
+
+ length = cnt->gapLen + contig_array[finish].length;
+ min = length - 1.5 * ins_size_var;
+ max = length + 1.5 * ins_size_var;
+ traceAlongMaskedCnt ( finish, start, max_steps, min, max, 0, 0, &num_route );
+
+ if ( finish == start )
+ {
+ for ( j = 0; j < tempCounter; j++ )
+ {
+ fprintf ( stderr, "->%d", * ( unsigned int * ) darrayGet ( tempArray, j ) );
+ }
+
+ fprintf ( stderr, ": start at %d\n", i );
+ }
+
+ if ( num_route == 1 )
+ {
+ for ( j = 0; j < max_steps; j++ )
+ if ( found_routes[0][j] == 0 )
+ {
+ break;
+ }
+
+ if ( j < 1 )
+ {
+ continue;
+ }
+
+ //check if connects have been used more than once
+ multiUSE = setUsed ( start, found_routes[0], max_steps, 1 );
+
+ if ( multiUSE )
+ {
+ continue;
+ }
+
+ for ( j = 0; j < max_steps; j++ )
+ {
+ if ( j + 1 == max_steps || found_routes[0][j + 1] == 0 )
+ {
+ break;
+ }
+
+ * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) = found_routes[0][j];
+ contig_array[found_routes[0][j]].flag = 1;
+ contig_array[getTwinCtg ( found_routes[0][j] )].flag = 1;
+ }
+
+ recoverCounter += j;
+ setConnectDelete ( start, finish, 1, 1 );
+ change = 1;
+ stat[0]++;
+ } //end if num_route=1
+ else if ( num_route > 1 )
+ {
+ //multi-route.
+ int k, l, num, longest = 0, longestid = 0;
+ int merg = 0, quality = 0;
+
+ // get the longest route.
+ for ( k = 0; k < num_route; k++ )
+ {
+ for ( j = 0; j < max_steps; j++ )
+ {
+ if ( j + 1 == max_steps || found_routes[k][j + 1] == 0 )
+ {
+ if ( j > longest )
+ {
+ longest = j;
+ longestid = k;
+ }
+
+ break;
+ }
+ }
+ }
+
+ stat[1]++;
+
+ if ( longest == 1 ) //multi one.
+ {
+ stat[2]++;
+
+ for ( k = 0; k < num_route; k++ )
+ {
+ if ( score_pass ( tempArray, tempCounter, t, t + 1, found_routes[k][0] ) )
+ {
+ longestid = k;
+ quality = 1;
+ stat[3]++;
+ break;
+ }
+ }
+
+ if ( quality == 0 )
+ {
+ continue;
+ }
+ }
+ else
+ {
+ stat[4]++;
+
+ for ( k = 0; k < num_route; k++ )
+ {
+ if ( k == longestid )
+ {
+ continue;
+ }
+
+ int merg_num = 0, total = 0;
+
+ for ( j = 0; j < max_steps; j++ )
+ {
+ if ( j + 1 == max_steps || found_routes[k][j + 1] == 0 )
+ {
+ total = j;
+ break;
+ }
+
+ for ( l = 0; l < longest; l++ )
+ {
+ if ( found_routes[k][j] == found_routes[longestid][l] )
+ {
+ merg_num++;
+ break;
+ }
+ }
+ }
+
+ if ( merg_num == total )
+ {
+ merg++;
+ }
+ }
+ }
+
+ if ( merg == num_route - 1 || quality == 1 || merg >= longest )
+ {
+ multiUSE = setUsed ( start, found_routes[longestid], max_steps, 1 );
+
+ if ( multiUSE )
+ {
+ continue;
+ }
+
+ stat[5]++;
+
+ for ( j = 0; j < longest; j++ )
+ {
+ * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) = found_routes[longestid][j];
+ contig_array[found_routes[longestid][j]].flag = 1;
+ contig_array[getTwinCtg ( found_routes[longestid][j] )].flag = 1;
+ }
+
+ stat[6] += j;
+ recoverCounter += j;
+ setConnectDelete ( start, finish, 1, 1 );
+ change = 1;
+ }
+ }
+ }
+
+ * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) =
+ * ( unsigned int * ) darrayGet ( tempArray, tempCounter - 1 );
+
+ if ( change )
+ {
+ consolidate();
+ }
+ }
+
+ fprintf ( stderr, "\nRecover contigs.\n" );
+ fprintf ( stderr, " Total recovered contigs %d\n", recoverCounter );
+ fprintf ( stderr, " Single-route cases %d\n", stat[0] );
+ fprintf ( stderr, " Multi-route cases %d\n", stat[1] );
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ cnt = contig_array[i].downwardConnect;
+
+ while ( cnt )
+ {
+ cnt->used = 0;
+ cnt->checking = 0;
+ cnt = cnt->next;
+ }
+ }
+
+ for ( j = 0; j < max_n_routes; j++ )
+ {
+ free ( ( void * ) found_routes[j] );
+ }
+
+ free ( ( void * ) found_routes );
+ free ( ( void * ) so_far );
}
@@ -2763,47 +3013,59 @@ Return:
*************************************************/
static void unBindLink ( unsigned int CB, unsigned int CC )
{
- CONNECT * cnt1 = getCntBetween ( CB, CC );
-
- if ( !cnt1 )
- { return; }
-
- if ( cnt1->singleInScaf )
- { cnt1->singleInScaf = 0; }
-
- CONNECT * cnt2 = getCntBetween ( getTwinCtg ( CC ), getTwinCtg ( CB ) );
-
- if ( !cnt2 )
- { return; }
-
- if ( cnt2->singleInScaf )
- { cnt2->singleInScaf = 0; }
-
- if ( cnt1->nextInScaf )
- {
- unsigned int CD = cnt1->nextInScaf->contigID;
- cnt1->nextInScaf->prevInScaf = 0;
- cnt1->nextInScaf = NULL;
- CONNECT * cnt3 = getCntBetween ( getTwinCtg ( CD ), getTwinCtg ( CC ) );
-
- if ( cnt3 )
- { cnt3->nextInScaf = NULL; }
-
- cnt2->prevInScaf = 0;
- }
-
- if ( cnt2->nextInScaf )
- {
- unsigned int bal_CA = cnt2->nextInScaf->contigID;
- cnt2->nextInScaf->prevInScaf = 0;
- cnt2->nextInScaf = NULL;
- CONNECT * cnt4 = getCntBetween ( getTwinCtg ( bal_CA ), CB );
-
- if ( cnt4 )
- { cnt4->nextInScaf = NULL; }
-
- cnt1->prevInScaf = 0;
- }
+ CONNECT *cnt1 = getCntBetween ( CB, CC );
+
+ if ( !cnt1 )
+ {
+ return;
+ }
+
+ if ( cnt1->singleInScaf )
+ {
+ cnt1->singleInScaf = 0;
+ }
+
+ CONNECT *cnt2 = getCntBetween ( getTwinCtg ( CC ), getTwinCtg ( CB ) );
+
+ if ( !cnt2 )
+ {
+ return;
+ }
+
+ if ( cnt2->singleInScaf )
+ {
+ cnt2->singleInScaf = 0;
+ }
+
+ if ( cnt1->nextInScaf )
+ {
+ unsigned int CD = cnt1->nextInScaf->contigID;
+ cnt1->nextInScaf->prevInScaf = 0;
+ cnt1->nextInScaf = NULL;
+ CONNECT *cnt3 = getCntBetween ( getTwinCtg ( CD ), getTwinCtg ( CC ) );
+
+ if ( cnt3 )
+ {
+ cnt3->nextInScaf = NULL;
+ }
+
+ cnt2->prevInScaf = 0;
+ }
+
+ if ( cnt2->nextInScaf )
+ {
+ unsigned int bal_CA = cnt2->nextInScaf->contigID;
+ cnt2->nextInScaf->prevInScaf = 0;
+ cnt2->nextInScaf = NULL;
+ CONNECT *cnt4 = getCntBetween ( getTwinCtg ( bal_CA ), CB );
+
+ if ( cnt4 )
+ {
+ cnt4->nextInScaf = NULL;
+ }
+
+ cnt1->prevInScaf = 0;
+ }
}
/*************************************************
@@ -2820,190 +3082,200 @@ Return:
*************************************************/
static void freezing()
{
- int num5, num3;
- unsigned int ctg, bal_ctg;
- unsigned int i;
- int j, t;
- CONNECT * cnt, *prevCNT, *nextCnt;
- boolean excep;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- contig_array[i].flag = 0;
- contig_array[i].from_vt = 0;
- contig_array[i].to_vt = 0;
- cnt = contig_array[i].downwardConnect;
-
- while ( cnt )
- {
- cnt->used = 0;
- cnt->checking = 0;
- cnt->singleInScaf = 0;
- cnt = cnt->next;
- }
- }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].flag || contig_array[i].mask )
- { continue; }
-
- if ( !contig_array[i].downwardConnect || !validConnect ( i, NULL ) )
- {
- continue;
- }
-
- num5 = num3 = 0;
- ctg = i;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
- contig_array[i].flag = 1;
- contig_array[getTwinCtg ( i )].flag = 1;
- prevCNT = NULL;
- cnt = getNextContig ( ctg, prevCNT, &excep );
-
- while ( cnt )
- {
- if ( contig_array[cnt->contigID].flag )
- {
- unBindLink ( ctg, cnt->contigID );
- break;
- }
-
- nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
- setConnectUsed ( ctg, cnt->contigID, 1 );
- ctg = cnt->contigID;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- prevCNT = cnt;
- cnt = nextCnt;
- }
-
- ctg = getTwinCtg ( i );
-
- if ( num5 >= 2 )
- { prevCNT = checkConnect ( getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf5, 1 ) ), ctg ); }
- else
- { prevCNT = NULL; }
-
- cnt = getNextContig ( ctg, prevCNT, &excep );
-
- while ( cnt )
- {
- if ( contig_array[cnt->contigID].flag )
- {
- unBindLink ( ctg, cnt->contigID );
- break;
- }
-
- nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
- setConnectUsed ( ctg, cnt->contigID, 1 );
- ctg = cnt->contigID;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
- prevCNT = cnt;
- cnt = nextCnt;
- }
-
- if ( num5 + num3 < 2 )
- { continue; }
-
- solidCounter = 0;
-
- for ( j = num3 - 1; j >= 0; j-- )
- * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) =
- * ( unsigned int * ) darrayGet ( scaf3, j );
-
- for ( j = 0; j < num5; j++ )
- * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) =
- * ( unsigned int * ) darrayGet ( scaf5, j );
-
- unsigned int firstCtg = 0;
- unsigned int lastCtg = 0;
- unsigned int firstTwin = 0;
- unsigned int lastTwin = 0;
-
- for ( t = 0; t < solidCounter; t++ )
- if ( !contig_array[* ( unsigned int * ) darrayGet ( solidArray, t )].mask )
- {
- firstCtg = * ( unsigned int * ) darrayGet ( solidArray, t );
- break;
- }
-
- for ( t = solidCounter - 1; t >= 0; t-- )
- if ( !contig_array[* ( unsigned int * ) darrayGet ( solidArray, t )].mask )
- {
- lastCtg = * ( unsigned int * ) darrayGet ( solidArray, t );
- break;
- }
-
- if ( firstCtg == 0 || lastCtg == 0 )
- {
- fprintf ( stderr, "scaffold start at %d, stop at %d, freezing began with %d\n", firstCtg, lastCtg, i );
-
- for ( j = 0; j < solidCounter; j++ )
- fprintf ( stderr, "->%d(%d %d)", * ( unsigned int * ) darrayGet ( solidArray, j )
- , contig_array[* ( unsigned int * ) darrayGet ( solidArray, j )].mask
- , contig_array[* ( unsigned int * ) darrayGet ( solidArray, j )].flag );
-
- fprintf ( stderr, "\n" );
- }
- else
- {
- firstTwin = getTwinCtg ( firstCtg );
- lastTwin = getTwinCtg ( lastCtg );
- }
-
- for ( t = 0; t < solidCounter; t++ )
- {
- unsigned int ctg = * ( unsigned int * ) darrayGet ( solidArray, t );
-
- if ( contig_array[ctg].from_vt > 0 )
- {
- contig_array[ctg].mask = 1;
- contig_array[getTwinCtg ( ctg )].mask = 1;
- fprintf ( stderr, "Repeat: contig %d (%d) appears more than once\n", ctg, getTwinCtg ( ctg ) );
- }
- else
- {
- contig_array[ctg].from_vt = firstCtg;
- contig_array[ctg].to_vt = lastCtg;
- contig_array[ctg].indexInScaf = t + 1;
- contig_array[getTwinCtg ( ctg )].from_vt = lastTwin;
- contig_array[getTwinCtg ( ctg )].to_vt = firstTwin;
- contig_array[getTwinCtg ( ctg )].indexInScaf = solidCounter - t;
- }
- }
-
- consolidate();
- }
-
- fprintf ( stderr, "Freezing done.\n" );
- fflush ( stdout );
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].flag )
- { contig_array[i].flag = 0; }
-
- if ( contig_array[i].from_vt == 0 )
- {
- contig_array[i].from_vt = i;
- contig_array[i].to_vt = i;
- }
-
- cnt = contig_array[i].downwardConnect;
-
- while ( cnt )
- {
- cnt->used = 0;
- cnt->checking = 0;
- cnt = cnt->next;
- }
- }
+ int num5, num3;
+ unsigned int ctg, bal_ctg;
+ unsigned int i;
+ int j, t;
+ CONNECT *cnt, *prevCNT, *nextCnt;
+ boolean excep;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ contig_array[i].flag = 0;
+ contig_array[i].from_vt = 0;
+ contig_array[i].to_vt = 0;
+ cnt = contig_array[i].downwardConnect;
+
+ while ( cnt )
+ {
+ cnt->used = 0;
+ cnt->checking = 0;
+ cnt->singleInScaf = 0;
+ cnt = cnt->next;
+ }
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].flag || contig_array[i].mask )
+ {
+ continue;
+ }
+
+ if ( !contig_array[i].downwardConnect || !validConnect ( i, NULL ) )
+ {
+ continue;
+ }
+
+ num5 = num3 = 0;
+ ctg = i;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
+ contig_array[i].flag = 1;
+ contig_array[getTwinCtg ( i )].flag = 1;
+ prevCNT = NULL;
+ cnt = getNextContig ( ctg, prevCNT, &excep );
+
+ while ( cnt )
+ {
+ if ( contig_array[cnt->contigID].flag )
+ {
+ unBindLink ( ctg, cnt->contigID );
+ break;
+ }
+
+ nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
+ setConnectUsed ( ctg, cnt->contigID, 1 );
+ ctg = cnt->contigID;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ }
+
+ ctg = getTwinCtg ( i );
+
+ if ( num5 >= 2 )
+ {
+ prevCNT = checkConnect ( getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf5, 1 ) ), ctg );
+ }
+ else
+ {
+ prevCNT = NULL;
+ }
+
+ cnt = getNextContig ( ctg, prevCNT, &excep );
+
+ while ( cnt )
+ {
+ if ( contig_array[cnt->contigID].flag )
+ {
+ unBindLink ( ctg, cnt->contigID );
+ break;
+ }
+
+ nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
+ setConnectUsed ( ctg, cnt->contigID, 1 );
+ ctg = cnt->contigID;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ }
+
+ if ( num5 + num3 < 2 )
+ {
+ continue;
+ }
+
+ solidCounter = 0;
+
+ for ( j = num3 - 1; j >= 0; j-- )
+ * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) =
+ * ( unsigned int * ) darrayGet ( scaf3, j );
+
+ for ( j = 0; j < num5; j++ )
+ * ( unsigned int * ) darrayPut ( solidArray, solidCounter++ ) =
+ * ( unsigned int * ) darrayGet ( scaf5, j );
+
+ unsigned int firstCtg = 0;
+ unsigned int lastCtg = 0;
+ unsigned int firstTwin = 0;
+ unsigned int lastTwin = 0;
+
+ for ( t = 0; t < solidCounter; t++ )
+ if ( !contig_array[* ( unsigned int * ) darrayGet ( solidArray, t )].mask )
+ {
+ firstCtg = * ( unsigned int * ) darrayGet ( solidArray, t );
+ break;
+ }
+
+ for ( t = solidCounter - 1; t >= 0; t-- )
+ if ( !contig_array[* ( unsigned int * ) darrayGet ( solidArray, t )].mask )
+ {
+ lastCtg = * ( unsigned int * ) darrayGet ( solidArray, t );
+ break;
+ }
+
+ if ( firstCtg == 0 || lastCtg == 0 )
+ {
+ fprintf ( stderr, "scaffold start at %d, stop at %d, freezing began with %d\n", firstCtg, lastCtg, i );
+
+ for ( j = 0; j < solidCounter; j++ )
+ fprintf ( stderr, "->%d(%d %d)", * ( unsigned int * ) darrayGet ( solidArray, j )
+ , contig_array[* ( unsigned int * ) darrayGet ( solidArray, j )].mask
+ , contig_array[* ( unsigned int * ) darrayGet ( solidArray, j )].flag );
+
+ fprintf ( stderr, "\n" );
+ }
+ else
+ {
+ firstTwin = getTwinCtg ( firstCtg );
+ lastTwin = getTwinCtg ( lastCtg );
+ }
+
+ for ( t = 0; t < solidCounter; t++ )
+ {
+ unsigned int ctg = * ( unsigned int * ) darrayGet ( solidArray, t );
+
+ if ( contig_array[ctg].from_vt > 0 )
+ {
+ contig_array[ctg].mask = 1;
+ contig_array[getTwinCtg ( ctg )].mask = 1;
+ fprintf ( stderr, "Repeat: contig %d (%d) appears more than once\n", ctg, getTwinCtg ( ctg ) );
+ }
+ else
+ {
+ contig_array[ctg].from_vt = firstCtg;
+ contig_array[ctg].to_vt = lastCtg;
+ contig_array[ctg].indexInScaf = t + 1;
+ contig_array[getTwinCtg ( ctg )].from_vt = lastTwin;
+ contig_array[getTwinCtg ( ctg )].to_vt = firstTwin;
+ contig_array[getTwinCtg ( ctg )].indexInScaf = solidCounter - t;
+ }
+ }
+
+ consolidate();
+ }
+
+ fprintf ( stderr, "Freezing done.\n" );
+ fflush ( stdout );
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].flag )
+ {
+ contig_array[i].flag = 0;
+ }
+
+ if ( contig_array[i].from_vt == 0 )
+ {
+ contig_array[i].from_vt = i;
+ contig_array[i].to_vt = i;
+ }
+
+ cnt = contig_array[i].downwardConnect;
+
+ while ( cnt )
+ {
+ cnt->used = 0;
+ cnt->checking = 0;
+ cnt = cnt->next;
+ }
+ }
}
/************** codes below this line are for pulling the scaffolds out ************/
@@ -3020,31 +3292,35 @@ Output:
Return:
None.
*************************************************/
-void output1gap ( FILE * fo, int max_steps )
+void output1gap ( FILE *fo, int max_steps )
{
- int i, len, seg;
- len = seg = 0;
+ int i, len, seg;
+ len = seg = 0;
- for ( i = 0; i < max_steps - 1; i++ )
- {
- if ( found_routes[0][i + 1] == 0 )
- { break; }
+ for ( i = 0; i < max_steps - 1; i++ )
+ {
+ if ( found_routes[0][i + 1] == 0 )
+ {
+ break;
+ }
- len += contig_array[found_routes[0][i]].length;
- seg++;
- }
+ len += contig_array[found_routes[0][i]].length;
+ seg++;
+ }
- fprintf ( fo, "GAP %d %d", len, seg );
+ fprintf ( fo, "GAP %d %d", len, seg );
- for ( i = 0; i < max_steps - 1; i++ )
- {
- if ( found_routes[0][i + 1] == 0 )
- { break; }
+ for ( i = 0; i < max_steps - 1; i++ )
+ {
+ if ( found_routes[0][i + 1] == 0 )
+ {
+ break;
+ }
- fprintf ( fo, " %d", found_routes[0][i] );
- }
+ fprintf ( fo, " %d", found_routes[0][i] );
+ }
- fprintf ( fo, "\n" );
+ fprintf ( fo, "\n" );
}
static int weakCounter;
@@ -3062,72 +3338,78 @@ Output:
Return:
0 if contig's downstream connection was not weak connection.
*************************************************/
-static boolean printCnts ( FILE * fp, unsigned int ctg )
+static boolean printCnts ( FILE *fp, unsigned int ctg )
{
- CONNECT * cnt = contig_array[ctg].downwardConnect;
- boolean flag = 0, ret = 0;
- unsigned int bal_ctg = getTwinCtg ( ctg );
- unsigned int linkCtg;
-
- if ( isSameAsTwin ( ctg ) )
- { return ret; }
-
- CONNECT * bindCnt = getBindCnt ( ctg );
-
- if ( bindCnt && bindCnt->bySmall && bindCnt->weakPoint )
- {
- weakCounter++;
- fprintf ( fp, "\tWP" );
- ret = 1;
- }
-
- while ( cnt )
- {
- if ( cnt->weight && !cnt->inherit )
- {
- if ( !flag )
- {
- flag = 1;
- fprintf ( fp, "\t#DOWN " );
- }
-
- linkCtg = cnt->contigID;
-
- if ( isLargerThanTwin ( linkCtg ) )
- { linkCtg = getTwinCtg ( linkCtg ); }
-
- fprintf ( fp, "%d:%d:%d ", index_array[linkCtg], cnt->weight, cnt->gapLen );
- }
-
- cnt = cnt->next;
- }
-
- flag = 0;
- cnt = contig_array[bal_ctg].downwardConnect;
-
- while ( cnt )
- {
- if ( cnt->weight && !cnt->inherit )
- {
- if ( !flag )
- {
- flag = 1;
- fprintf ( fp, "\t#UP " );
- }
-
- linkCtg = cnt->contigID;
-
- if ( isLargerThanTwin ( linkCtg ) )
- { linkCtg = getTwinCtg ( linkCtg ); }
-
- fprintf ( fp, "%d:%d:%d ", index_array[linkCtg], cnt->weight, cnt->gapLen );
- }
-
- cnt = cnt->next;
- }
-
- fprintf ( fp, "\n" );
- return ret;
+ CONNECT *cnt = contig_array[ctg].downwardConnect;
+ boolean flag = 0, ret = 0;
+ unsigned int bal_ctg = getTwinCtg ( ctg );
+ unsigned int linkCtg;
+
+ if ( isSameAsTwin ( ctg ) )
+ {
+ return ret;
+ }
+
+ CONNECT *bindCnt = getBindCnt ( ctg );
+
+ if ( bindCnt && bindCnt->bySmall && bindCnt->weakPoint )
+ {
+ weakCounter++;
+ fprintf ( fp, "\tWP" );
+ ret = 1;
+ }
+
+ while ( cnt )
+ {
+ if ( cnt->weight && !cnt->inherit )
+ {
+ if ( !flag )
+ {
+ flag = 1;
+ fprintf ( fp, "\t#DOWN " );
+ }
+
+ linkCtg = cnt->contigID;
+
+ if ( isLargerThanTwin ( linkCtg ) )
+ {
+ linkCtg = getTwinCtg ( linkCtg );
+ }
+
+ fprintf ( fp, "%d:%d:%d ", index_array[linkCtg], cnt->weight, cnt->gapLen );
+ }
+
+ cnt = cnt->next;
+ }
+
+ flag = 0;
+ cnt = contig_array[bal_ctg].downwardConnect;
+
+ while ( cnt )
+ {
+ if ( cnt->weight && !cnt->inherit )
+ {
+ if ( !flag )
+ {
+ flag = 1;
+ fprintf ( fp, "\t#UP " );
+ }
+
+ linkCtg = cnt->contigID;
+
+ if ( isLargerThanTwin ( linkCtg ) )
+ {
+ linkCtg = getTwinCtg ( linkCtg );
+ }
+
+ fprintf ( fp, "%d:%d:%d ", index_array[linkCtg], cnt->weight, cnt->gapLen );
+ }
+
+ cnt = cnt->next;
+ }
+
+ fprintf ( fp, "\n" );
+ return ret;
}
/*************************************************
@@ -3144,667 +3426,671 @@ Output:
Return:
None.
*************************************************/
-void ScafStat ( int len_cut, char * graphfile )
+void ScafStat ( int len_cut, char *graphfile )
{
- FILE * fp, *fp2, *fo;
- char line[1024];
- sprintf ( line, "%s.scafSeq", graphfile );
- fp = ckopen ( line, "r" );
- sprintf ( line, "%s.contig", graphfile );
- fp2 = ckopen ( line, "r" );
- sprintf ( line, "%s.scafStatistics", graphfile );
- fo = ckopen ( line, "w" );
- fprintf ( fo, "<-- Information for assembly Scaffold '%s.scafSeq'.(cut_off_length < 100bp) -->\n\n", graphfile );
- int cut_off_len = 0;
- char Nucleotide;
- char buf[4000];
- long Scaffold_Number = 0;
- long Scaffold_Number_Scaf = 0;
- long Scaffold_Number_Contig = 0;
- long Singleton_Number_Scaf = 0;
- long Singleton_Number = 0;
- long * Singleton_Seq = ( long * ) malloc ( sizeof ( long ) );
- long long A_num_all = 0;
- long * A_num = ( long * ) malloc ( sizeof ( long ) );
- long long C_num_all = 0;
- long * C_num = ( long * ) malloc ( sizeof ( long ) );
- long long G_num_all = 0;
- long * G_num = ( long * ) malloc ( sizeof ( long ) );
- long long T_num_all = 0;
- long * T_num = ( long * ) malloc ( sizeof ( long ) );
- long long N_num_all = 0;
- long * N_num = ( long * ) malloc ( sizeof ( long ) );
- long long Non_ACGTN_all = 0;
- long * Non_ACGTN = ( long * ) malloc ( sizeof ( long ) );
- long long Size_includeN = 0;
- long * Size_Seq = ( long * ) malloc ( sizeof ( long ) );
- int k;
- long long Sum = 0;
- int flag[10];
- int flag_known = 0;
- long n100 = 0;
- long n500 = 0;
- long n1k = 0;
- long n10k = 0;
- long n100k = 0;
- long n1m = 0;
- long N50 = 0;
- long N50_known = 0;
- long Num_N50_known = 0;
- cut_off_len = len_cut;
- A_num[Scaffold_Number] = 0;
- C_num[Scaffold_Number] = 0;
- G_num[Scaffold_Number] = 0;
- T_num[Scaffold_Number] = 0;
- N_num[Scaffold_Number] = 0;
- Non_ACGTN[Scaffold_Number] = 0;
- Size_Seq[Scaffold_Number] = 0;
- Singleton_Seq[Scaffold_Number] = 0;
- Nucleotide = fgetc ( fp );
-
- while ( Nucleotide != (char) EOF ) /* Bug Fix */
- {
- if ( Nucleotide == '>' )
- {
- if ( ( Scaffold_Number > 0 ) && ( Size_Seq[Scaffold_Number - 1] < cut_off_len ) )
- {
- A_num_all = A_num_all - A_num[Scaffold_Number - 1];
- C_num_all = C_num_all - C_num[Scaffold_Number - 1];
- G_num_all = G_num_all - G_num[Scaffold_Number - 1];
- T_num_all = T_num_all - T_num[Scaffold_Number - 1];
- N_num_all = N_num_all - N_num[Scaffold_Number - 1];
- Non_ACGTN_all = Non_ACGTN_all - Non_ACGTN[Scaffold_Number - 1];
- Size_includeN = Size_includeN - Size_Seq[Scaffold_Number - 1];
- Singleton_Number = Singleton_Number - Singleton_Seq[Scaffold_Number - 1];
- Scaffold_Number = Scaffold_Number - 1;
- }
- else
- {
- Size_Seq = ( long * ) realloc ( Size_Seq, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- A_num = ( long * ) realloc ( A_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- C_num = ( long * ) realloc ( C_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- G_num = ( long * ) realloc ( G_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- T_num = ( long * ) realloc ( T_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- N_num = ( long * ) realloc ( N_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- Non_ACGTN = ( long * ) realloc ( Non_ACGTN, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- Singleton_Seq = ( long * ) realloc ( Singleton_Seq, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- }
-
- Scaffold_Number++;
- A_num[Scaffold_Number - 1] = 0;
- C_num[Scaffold_Number - 1] = 0;
- G_num[Scaffold_Number - 1] = 0;
- T_num[Scaffold_Number - 1] = 0;
- N_num[Scaffold_Number - 1] = 0;
- Non_ACGTN[Scaffold_Number - 1] = 0;
- Size_Seq[Scaffold_Number - 1] = 0;
- Singleton_Seq[Scaffold_Number - 1] = 0;
- Nucleotide = fgetc ( fp );
-
- if ( Nucleotide == 'C' )
- {
- Singleton_Number++;
- Singleton_Seq[Scaffold_Number - 1] ++;
- }
-
- fgets ( buf, 4000, fp );
- }
- else if ( ( Nucleotide == 'N' ) || ( Nucleotide == 'n' ) )
- {
- N_num[Scaffold_Number - 1] ++;
- N_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else if ( ( Nucleotide == 'A' ) || ( Nucleotide == 'a' ) )
- {
- A_num[Scaffold_Number - 1] ++;
- A_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else if ( ( Nucleotide == 'C' ) || ( Nucleotide == 'c' ) )
- {
- C_num[Scaffold_Number - 1] ++;
- C_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else if ( ( Nucleotide == 'G' ) || ( Nucleotide == 'g' ) )
- {
- G_num[Scaffold_Number - 1] ++;
- G_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else if ( ( Nucleotide == 'T' ) || ( Nucleotide == 't' ) )
- {
- T_num[Scaffold_Number - 1] ++;
- T_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else
- {
- if ( ( Nucleotide != '\n' ) && ( Nucleotide != '\r' ) )
- {
- Non_ACGTN[Scaffold_Number - 1] ++;
- Non_ACGTN_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- }
-
- Nucleotide = fgetc ( fp );
- }
-
- if ( Size_Seq[Scaffold_Number - 1] < cut_off_len )
- {
- A_num_all = A_num_all - A_num[Scaffold_Number - 1];
- C_num_all = C_num_all - C_num[Scaffold_Number - 1];
- G_num_all = G_num_all - G_num[Scaffold_Number - 1];
- T_num_all = T_num_all - T_num[Scaffold_Number - 1];
- N_num_all = N_num_all - N_num[Scaffold_Number - 1];
- Non_ACGTN_all = Non_ACGTN_all - Non_ACGTN[Scaffold_Number - 1];
- Size_includeN = Size_includeN - Size_Seq[Scaffold_Number - 1];
- Singleton_Number = Singleton_Number - Singleton_Seq[Scaffold_Number - 1];
- Scaffold_Number = Scaffold_Number - 1;
- }
-
- qsort ( Size_Seq, Scaffold_Number, sizeof ( Size_Seq[0] ), cmp_int );
- fprintf ( fo, "Size_includeN\t%lld\n", Size_includeN );
- fprintf ( fo, "Size_withoutN\t%lld\n", Size_includeN - N_num_all );
- fprintf ( fo, "Scaffold_Num\t%ld\n", Scaffold_Number );
- fprintf ( fo, "Mean_Size\t%lld\n", Size_includeN / Scaffold_Number );
- fprintf ( fo, "Median_Size\t%ld\n", Size_Seq[ ( Scaffold_Number + 1 ) / 2 - 1] );
- fprintf ( fo, "Longest_Seq\t%ld\n", Size_Seq[Scaffold_Number - 1] );
- fprintf ( fo, "Shortest_Seq\t%ld\n", Size_Seq[0] );
- fprintf ( fo, "Singleton_Num\t%ld\n", Singleton_Number );
- fprintf ( fo, "Average_length_of_break(N)_in_scaffold\t%lld\n", N_num_all / Scaffold_Number );
- fprintf ( fo, "\n" );
-
- if ( known_genome_size )
- {
- fprintf ( fo, "Known_genome_size\t%ld\n", known_genome_size );
- fprintf ( fo, "Total_scaffold_length_as_percentage_of_known_genome_size\t%.2f%\n", 100 * ( 1.0 * Size_includeN / known_genome_size ) );
- }
- else
- {
- fprintf ( fo, "Known_genome_size\tNaN\n" );
- fprintf ( fo, "Total_scaffold_length_as_percentage_of_known_genome_size\tNaN\n" );
- }
-
- fprintf ( fo, "\n" );
-
- for ( k = 0; k < Scaffold_Number; k++ )
- {
- if ( Size_Seq[k] > 100 )
- {
- n100++;
- }
-
- if ( Size_Seq[k] > 500 )
- {
- n500++;
- }
-
- if ( Size_Seq[k] > 1000 )
- {
- n1k++;
- }
-
- if ( Size_Seq[k] > 10000 )
- {
- n10k++;
- }
-
- if ( Size_Seq[k] > 100000 )
- {
- n100k++;
- }
-
- if ( Size_Seq[k] > 1000000 )
- {
- n1m++;
- }
- }
-
- fprintf ( fo, "scaffolds>100 \t%ld\t%.2f%\n", n100 , 100 * ( 1.0 * n100 / Scaffold_Number ) );
- fprintf ( fo, "scaffolds>500 \t%ld\t%.2f%\n", n500 , 100 * ( 1.0 * n500 / Scaffold_Number ) );
- fprintf ( fo, "scaffolds>1K \t%ld\t%.2f%\n", n1k , 100 * ( 1.0 * n1k / Scaffold_Number ) );
- fprintf ( fo, "scaffolds>10K \t%ld\t%.2f%\n", n10k , 100 * ( 1.0 * n10k / Scaffold_Number ) );
- fprintf ( fo, "scaffolds>100K\t%ld\t%.2f%\n", n100k, 100 * ( 1.0 * n100k / Scaffold_Number ) );
- fprintf ( fo, "scaffolds>1M \t%ld\t%.2f%\n", n1m , 100 * ( 1.0 * n1m / Scaffold_Number ) );
- fprintf ( fo, "\n" );
- fprintf ( fo, "Nucleotide_A\t%lld\t%.2f%\n", A_num_all, 100 * ( 1.0 * A_num_all / Size_includeN ) );
- fprintf ( fo, "Nucleotide_C\t%lld\t%.2f%\n", C_num_all, 100 * ( 1.0 * C_num_all / Size_includeN ) );
- fprintf ( fo, "Nucleotide_G\t%lld\t%.2f%\n", G_num_all, 100 * ( 1.0 * G_num_all / Size_includeN ) );
- fprintf ( fo, "Nucleotide_T\t%lld\t%.2f%\n", T_num_all, 100 * ( 1.0 * T_num_all / Size_includeN ) );
- fprintf ( fo, "GapContent_N\t%lld\t%.2f%\n", N_num_all, 100 * ( 1.0 * N_num_all / Size_includeN ) );
- fprintf ( fo, "Non_ACGTN\t%lld\t%.2f%\n", Non_ACGTN_all, 100 * ( 1.0 * Non_ACGTN_all / Size_includeN ) );
- fprintf ( fo, "GC_Content\t%.2f%\t\t(G+C)/(A+C+G+T)\n", 100 * ( 1.0 * ( G_num_all + C_num_all ) / ( A_num_all + C_num_all + G_num_all + T_num_all ) ) );
- fprintf ( fo, "\n" );
-
- for ( k = 0; k < 10; k++ )
- { flag[k] = 0; }
-
- for ( k = Scaffold_Number - 1; k >= 0; k-- )
- {
- Sum = Sum + Size_Seq[k];
-
- if ( ( Sum >= Size_includeN * 0.1 ) && ( Sum < Size_includeN * 0.2 ) && ( flag[1] == 0 ) )
- {
- fprintf ( fo, "N10\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[1] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.2 ) && ( Sum < Size_includeN * 0.3 ) && ( flag[2] == 0 ) )
- {
- fprintf ( fo, "N20\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[2] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.3 ) && ( Sum < Size_includeN * 0.4 ) && ( flag[3] == 0 ) )
- {
- fprintf ( fo, "N30\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[3] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.4 ) && ( Sum < Size_includeN * 0.5 ) && ( flag[4] == 0 ) )
- {
- fprintf ( fo, "N40\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[4] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.5 ) && ( Sum < Size_includeN * 0.6 ) && ( flag[5] == 0 ) )
- {
- fprintf ( fo, "N50\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[5] = 1;
- N50 = Size_Seq[k];
- }
- else if ( ( Sum >= Size_includeN * 0.6 ) && ( Sum < Size_includeN * 0.7 ) && ( flag[6] == 0 ) )
- {
- fprintf ( fo, "N60\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[6] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.7 ) && ( Sum < Size_includeN * 0.8 ) && ( flag[7] == 0 ) )
- {
- fprintf ( fo, "N70\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[7] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.8 ) && ( Sum < Size_includeN * 0.9 ) && ( flag[8] == 0 ) )
- {
- fprintf ( fo, "N80\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[8] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.9 ) && ( flag[9] == 0 ) )
- {
- fprintf ( fo, "N90\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[9] = 1;
- }
-
- if ( ( Sum >= known_genome_size * 0.5 ) && ( flag_known == 0 ) )
- {
- N50_known = Size_Seq[k];
- Num_N50_known = Scaffold_Number - k;
- flag_known = 1;
- }
- }
-
- if ( flag[5] == 0 )
- {
- Sum = 0;
-
- for ( k = Scaffold_Number - 1; k >= 0; k-- )
- {
- Sum = Sum + Size_Seq[k];
-
- if ( Sum >= Size_includeN * 0.5 )
- {
- fprintf ( fo, "N50\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- break;
- }
- }
- }
-
- fprintf ( fo, "\n" );
-
- if ( known_genome_size )
- {
- fprintf ( fo, "NG50\t%ld\t%ld\n", N50_known, Num_N50_known );
- fprintf ( fo, "N50_scaffold-NG50_scaffold_length_difference\t%ld\n", abs ( N50 - N50_known ) );
- }
- else
- {
- fprintf ( fo, "NG50\tNaN\tNaN\n" );
- fprintf ( fo, "N50_scaffold-NG50_scaffold_length_difference\tNaN\n" );
- }
-
- fprintf ( fo, "\n" );
- free ( A_num );
- free ( C_num );
- free ( G_num );
- free ( T_num );
- free ( N_num );
- free ( Non_ACGTN );
- free ( Singleton_Seq );
- free ( Size_Seq );
- Scaffold_Number_Scaf = Scaffold_Number;
- Singleton_Number_Scaf = Singleton_Number;
- /*********************** Contig ******************************/
- fprintf ( fo, "<-- Information for assembly Contig '%s.contig'.(cut_off_length < 100bp) -->\n\n", graphfile );
- cut_off_len = 0;
- Scaffold_Number = 0;
- Singleton_Number = 0;
- Singleton_Seq = ( long * ) malloc ( sizeof ( long ) );
- A_num_all = 0;
- A_num = ( long * ) malloc ( sizeof ( long ) );
- C_num_all = 0;
- C_num = ( long * ) malloc ( sizeof ( long ) );
- G_num_all = 0;
- G_num = ( long * ) malloc ( sizeof ( long ) );
- T_num_all = 0;
- T_num = ( long * ) malloc ( sizeof ( long ) );
- N_num_all = 0;
- N_num = ( long * ) malloc ( sizeof ( long ) );
- Non_ACGTN_all = 0;
- Non_ACGTN = ( long * ) malloc ( sizeof ( long ) );
- Size_includeN = 0;
- Size_Seq = ( long * ) malloc ( sizeof ( long ) );
- Sum = 0;
- n100 = 0;
- n500 = 0;
- n1k = 0;
- n10k = 0;
- n100k = 0;
- n1m = 0;
- N50 = 0;
- N50_known = 0;
- Num_N50_known = 0;
- flag_known = 0;
- cut_off_len = len_cut;
- A_num[Scaffold_Number] = 0;
- C_num[Scaffold_Number] = 0;
- G_num[Scaffold_Number] = 0;
- T_num[Scaffold_Number] = 0;
- N_num[Scaffold_Number] = 0;
- Non_ACGTN[Scaffold_Number] = 0;
- Size_Seq[Scaffold_Number] = 0;
- Singleton_Seq[Scaffold_Number] = 0;
- Nucleotide = fgetc ( fp2 );
-
- while ( Nucleotide != (char) EOF ) /* Bug Fix */
- {
- if ( Nucleotide == '>' )
- {
- if ( ( Scaffold_Number > 0 ) && ( Size_Seq[Scaffold_Number - 1] < cut_off_len ) )
- {
- A_num_all = A_num_all - A_num[Scaffold_Number - 1];
- C_num_all = C_num_all - C_num[Scaffold_Number - 1];
- G_num_all = G_num_all - G_num[Scaffold_Number - 1];
- T_num_all = T_num_all - T_num[Scaffold_Number - 1];
- N_num_all = N_num_all - N_num[Scaffold_Number - 1];
- Non_ACGTN_all = Non_ACGTN_all - Non_ACGTN[Scaffold_Number - 1];
- Size_includeN = Size_includeN - Size_Seq[Scaffold_Number - 1];
- Singleton_Number = Singleton_Number - Singleton_Seq[Scaffold_Number - 1];
- Scaffold_Number = Scaffold_Number - 1;
- }
- else
- {
- Size_Seq = ( long * ) realloc ( Size_Seq, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- A_num = ( long * ) realloc ( A_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- C_num = ( long * ) realloc ( C_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- G_num = ( long * ) realloc ( G_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- T_num = ( long * ) realloc ( T_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- N_num = ( long * ) realloc ( N_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- Non_ACGTN = ( long * ) realloc ( Non_ACGTN, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- Singleton_Seq = ( long * ) realloc ( Singleton_Seq, ( Scaffold_Number + 2 ) * sizeof ( long ) );
- }
-
- Scaffold_Number++;
- A_num[Scaffold_Number - 1] = 0;
- C_num[Scaffold_Number - 1] = 0;
- G_num[Scaffold_Number - 1] = 0;
- T_num[Scaffold_Number - 1] = 0;
- N_num[Scaffold_Number - 1] = 0;
- Non_ACGTN[Scaffold_Number - 1] = 0;
- Size_Seq[Scaffold_Number - 1] = 0;
- Singleton_Seq[Scaffold_Number - 1] = 0;
- Nucleotide = fgetc ( fp2 );
-
- if ( Nucleotide == 'C' )
- {
- Singleton_Number++;
- Singleton_Seq[Scaffold_Number - 1] ++;
- }
-
- fgets ( buf, 4000, fp2 );
- }
- else if ( ( Nucleotide == 'N' ) || ( Nucleotide == 'n' ) )
- {
- N_num[Scaffold_Number - 1] ++;
- N_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else if ( ( Nucleotide == 'A' ) || ( Nucleotide == 'a' ) )
- {
- A_num[Scaffold_Number - 1] ++;
- A_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else if ( ( Nucleotide == 'C' ) || ( Nucleotide == 'c' ) )
- {
- C_num[Scaffold_Number - 1] ++;
- C_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else if ( ( Nucleotide == 'G' ) || ( Nucleotide == 'g' ) )
- {
- G_num[Scaffold_Number - 1] ++;
- G_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else if ( ( Nucleotide == 'T' ) || ( Nucleotide == 't' ) )
- {
- T_num[Scaffold_Number - 1] ++;
- T_num_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- else
- {
- if ( ( Nucleotide != '\n' ) && ( Nucleotide != '\r' ) )
- {
- Non_ACGTN[Scaffold_Number - 1] ++;
- Non_ACGTN_all++;
- Size_Seq[Scaffold_Number - 1] ++;
- Size_includeN++;
- }
- }
-
- Nucleotide = fgetc ( fp2 );
- }
-
- if ( Size_Seq[Scaffold_Number - 1] < cut_off_len )
- {
- A_num_all = A_num_all - A_num[Scaffold_Number - 1];
- C_num_all = C_num_all - C_num[Scaffold_Number - 1];
- G_num_all = G_num_all - G_num[Scaffold_Number - 1];
- T_num_all = T_num_all - T_num[Scaffold_Number - 1];
- N_num_all = N_num_all - N_num[Scaffold_Number - 1];
- Non_ACGTN_all = Non_ACGTN_all - Non_ACGTN[Scaffold_Number - 1];
- Size_includeN = Size_includeN - Size_Seq[Scaffold_Number - 1];
- Singleton_Number = Singleton_Number - Singleton_Seq[Scaffold_Number - 1];
- Scaffold_Number = Scaffold_Number - 1;
- }
-
- qsort ( Size_Seq, Scaffold_Number, sizeof ( Size_Seq[0] ), cmp_int );
- fprintf ( fo, "Size_includeN\t%lld\n", Size_includeN );
- fprintf ( fo, "Size_withoutN\t%lld\n", Size_includeN - N_num_all );
- fprintf ( fo, "Contig_Num\t%ld\n", Scaffold_Number );
- fprintf ( fo, "Mean_Size\t%lld\n", Size_includeN / Scaffold_Number );
- fprintf ( fo, "Median_Size\t%ld\n", Size_Seq[ ( Scaffold_Number + 1 ) / 2 - 1] );
- fprintf ( fo, "Longest_Seq\t%ld\n", Size_Seq[Scaffold_Number - 1] );
- fprintf ( fo, "Shortest_Seq\t%ld\n", Size_Seq[0] );
- fprintf ( fo, "\n" );
-
- for ( k = 0; k < Scaffold_Number; k++ )
- {
- if ( Size_Seq[k] > 100 )
- {
- n100++;
- }
-
- if ( Size_Seq[k] > 500 )
- {
- n500++;
- }
-
- if ( Size_Seq[k] > 1000 )
- {
- n1k++;
- }
-
- if ( Size_Seq[k] > 10000 )
- {
- n10k++;
- }
-
- if ( Size_Seq[k] > 100000 )
- {
- n100k++;
- }
-
- if ( Size_Seq[k] > 1000000 )
- {
- n1m++;
- }
- }
-
- fprintf ( fo, "Contig>100 \t%ld\t%.2f%\n", n100 , 100 * ( 1.0 * n100 / Scaffold_Number ) );
- fprintf ( fo, "Contig>500 \t%ld\t%.2f%\n", n500 , 100 * ( 1.0 * n500 / Scaffold_Number ) );
- fprintf ( fo, "Contig>1K \t%ld\t%.2f%\n", n1k , 100 * ( 1.0 * n1k / Scaffold_Number ) );
- fprintf ( fo, "Contig>10K \t%ld\t%.2f%\n", n10k , 100 * ( 1.0 * n10k / Scaffold_Number ) );
- fprintf ( fo, "Contig>100K\t%ld\t%.2f%\n", n100k, 100 * ( 1.0 * n100k / Scaffold_Number ) );
- fprintf ( fo, "Contig>1M \t%ld\t%.2f%\n", n1m , 100 * ( 1.0 * n1m / Scaffold_Number ) );
- fprintf ( fo, "\n" );
- fprintf ( fo, "Nucleotide_A\t%lld\t%.2f%\n", A_num_all, 100 * ( 1.0 * A_num_all / Size_includeN ) );
- fprintf ( fo, "Nucleotide_C\t%lld\t%.2f%\n", C_num_all, 100 * ( 1.0 * C_num_all / Size_includeN ) );
- fprintf ( fo, "Nucleotide_G\t%lld\t%.2f%\n", G_num_all, 100 * ( 1.0 * G_num_all / Size_includeN ) );
- fprintf ( fo, "Nucleotide_T\t%lld\t%.2f%\n", T_num_all, 100 * ( 1.0 * T_num_all / Size_includeN ) );
- fprintf ( fo, "GapContent_N\t%lld\t%.2f%\n", N_num_all, 100 * ( 1.0 * N_num_all / Size_includeN ) );
- fprintf ( fo, "Non_ACGTN\t%lld\t%.2f%\n", Non_ACGTN_all, 100 * ( 1.0 * Non_ACGTN_all / Size_includeN ) );
- fprintf ( fo, "GC_Content\t%.2f%\t\t(G+C)/(A+C+G+T)\n", 100 * ( 1.0 * ( G_num_all + C_num_all ) / ( A_num_all + C_num_all + G_num_all + T_num_all ) ) );
- fprintf ( fo, "\n" );
-
- for ( k = 0; k < 10; k++ )
- { flag[k] = 0; }
-
- for ( k = Scaffold_Number - 1; k >= 0; k-- )
- {
- Sum = Sum + Size_Seq[k];
-
- if ( ( Sum >= Size_includeN * 0.1 ) && ( Sum < Size_includeN * 0.2 ) && ( flag[1] == 0 ) )
- {
- fprintf ( fo, "N10\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[1] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.2 ) && ( Sum < Size_includeN * 0.3 ) && ( flag[2] == 0 ) )
- {
- fprintf ( fo, "N20\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[2] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.3 ) && ( Sum < Size_includeN * 0.4 ) && ( flag[3] == 0 ) )
- {
- fprintf ( fo, "N30\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[3] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.4 ) && ( Sum < Size_includeN * 0.5 ) && ( flag[4] == 0 ) )
- {
- fprintf ( fo, "N40\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[4] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.5 ) && ( Sum < Size_includeN * 0.6 ) && ( flag[5] == 0 ) )
- {
- fprintf ( fo, "N50\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[5] = 1;
- N50 = Size_Seq[k];
- }
- else if ( ( Sum >= Size_includeN * 0.6 ) && ( Sum < Size_includeN * 0.7 ) && ( flag[6] == 0 ) )
- {
- fprintf ( fo, "N60\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[6] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.7 ) && ( Sum < Size_includeN * 0.8 ) && ( flag[7] == 0 ) )
- {
- fprintf ( fo, "N70\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[7] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.8 ) && ( Sum < Size_includeN * 0.9 ) && ( flag[8] == 0 ) )
- {
- fprintf ( fo, "N80\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[8] = 1;
- }
- else if ( ( Sum >= Size_includeN * 0.9 ) && ( flag[9] == 0 ) )
- {
- fprintf ( fo, "N90\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- flag[9] = 1;
- }
-
- if ( ( Sum >= known_genome_size * 0.5 ) && ( flag_known == 0 ) )
- {
- N50_known = Size_Seq[k];
- Num_N50_known = Scaffold_Number - k;
- flag_known = 1;
- }
- }
-
- if ( flag[5] == 0 )
- {
- Sum = 0;
-
- for ( k = Scaffold_Number - 1; k >= 0; k-- )
- {
- Sum = Sum + Size_Seq[k];
-
- if ( Sum >= Size_includeN * 0.5 )
- {
- fprintf ( fo, "N50\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
- break;
- }
- }
- }
-
- fprintf ( fo, "\n" );
-
- if ( known_genome_size )
- {
- fprintf ( fo, "NG50\t%ld\t%ld\n", N50_known, Num_N50_known );
- fprintf ( fo, "N50_contig-NG50_contig_length_difference\t%ld\n", abs ( N50 - N50_known ) );
- }
- else
- {
- fprintf ( fo, "NG50\tNaN\tNaN\n" );
- fprintf ( fo, "N50_contig-NG50_contig_length_difference\tNaN\n" );
- }
-
- fprintf ( fo, "\n" );
- free ( A_num );
- free ( C_num );
- free ( G_num );
- free ( T_num );
- free ( N_num );
- free ( Non_ACGTN );
- free ( Singleton_Seq );
- free ( Size_Seq );
- Scaffold_Number_Contig = Scaffold_Number;
- fprintf ( fo, "Number_of_contigs_in_scaffolds\t%ld\n", Scaffold_Number_Contig - Singleton_Number_Scaf );
- fprintf ( fo, "Number_of_contigs_not_in_scaffolds(Singleton)\t%ld\n", Singleton_Number_Scaf );
- fprintf ( fo, "Average_number_of_contigs_per_scaffold\t%.1f\n", 1.0 * ( Scaffold_Number_Contig - Singleton_Number_Scaf ) / ( Scaffold_Number_Scaf - Singleton_Number_Scaf ) );
- fprintf ( fo, "\n" );
- fclose ( fp );
- fclose ( fp2 );
- fclose ( fo );
+ FILE *fp, *fp2, *fo;
+ char line[1024];
+ sprintf ( line, "%s.scafSeq", graphfile );
+ fp = ckopen ( line, "r" );
+ sprintf ( line, "%s.contig", graphfile );
+ fp2 = ckopen ( line, "r" );
+ sprintf ( line, "%s.scafStatistics", graphfile );
+ fo = ckopen ( line, "w" );
+ fprintf ( fo, "<-- Information for assembly Scaffold '%s.scafSeq'.(cut_off_length < 100bp) -->\n\n", graphfile );
+ int cut_off_len = 0;
+ char Nucleotide;
+ char buf[4000];
+ long Scaffold_Number = 0;
+ long Scaffold_Number_Scaf = 0;
+ long Scaffold_Number_Contig = 0;
+ long Singleton_Number_Scaf = 0;
+ long Singleton_Number = 0;
+ long *Singleton_Seq = ( long * ) malloc ( sizeof ( long ) );
+ long long A_num_all = 0;
+ long *A_num = ( long * ) malloc ( sizeof ( long ) );
+ long long C_num_all = 0;
+ long *C_num = ( long * ) malloc ( sizeof ( long ) );
+ long long G_num_all = 0;
+ long *G_num = ( long * ) malloc ( sizeof ( long ) );
+ long long T_num_all = 0;
+ long *T_num = ( long * ) malloc ( sizeof ( long ) );
+ long long N_num_all = 0;
+ long *N_num = ( long * ) malloc ( sizeof ( long ) );
+ long long Non_ACGTN_all = 0;
+ long *Non_ACGTN = ( long * ) malloc ( sizeof ( long ) );
+ long long Size_includeN = 0;
+ long *Size_Seq = ( long * ) malloc ( sizeof ( long ) );
+ int k;
+ long long Sum = 0;
+ int flag[10];
+ int flag_known = 0;
+ long n100 = 0;
+ long n500 = 0;
+ long n1k = 0;
+ long n10k = 0;
+ long n100k = 0;
+ long n1m = 0;
+ long N50 = 0;
+ long N50_known = 0;
+ long Num_N50_known = 0;
+ cut_off_len = len_cut;
+ A_num[Scaffold_Number] = 0;
+ C_num[Scaffold_Number] = 0;
+ G_num[Scaffold_Number] = 0;
+ T_num[Scaffold_Number] = 0;
+ N_num[Scaffold_Number] = 0;
+ Non_ACGTN[Scaffold_Number] = 0;
+ Size_Seq[Scaffold_Number] = 0;
+ Singleton_Seq[Scaffold_Number] = 0;
+ Nucleotide = fgetc ( fp );
+
+ while ( Nucleotide != (char) EOF ) /* Bug Fix */
+ {
+ if ( Nucleotide == '>' )
+ {
+ if ( ( Scaffold_Number > 0 ) && ( Size_Seq[Scaffold_Number - 1] < cut_off_len ) )
+ {
+ A_num_all = A_num_all - A_num[Scaffold_Number - 1];
+ C_num_all = C_num_all - C_num[Scaffold_Number - 1];
+ G_num_all = G_num_all - G_num[Scaffold_Number - 1];
+ T_num_all = T_num_all - T_num[Scaffold_Number - 1];
+ N_num_all = N_num_all - N_num[Scaffold_Number - 1];
+ Non_ACGTN_all = Non_ACGTN_all - Non_ACGTN[Scaffold_Number - 1];
+ Size_includeN = Size_includeN - Size_Seq[Scaffold_Number - 1];
+ Singleton_Number = Singleton_Number - Singleton_Seq[Scaffold_Number - 1];
+ Scaffold_Number = Scaffold_Number - 1;
+ }
+ else
+ {
+ Size_Seq = ( long * ) realloc ( Size_Seq, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ A_num = ( long * ) realloc ( A_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ C_num = ( long * ) realloc ( C_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ G_num = ( long * ) realloc ( G_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ T_num = ( long * ) realloc ( T_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ N_num = ( long * ) realloc ( N_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ Non_ACGTN = ( long * ) realloc ( Non_ACGTN, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ Singleton_Seq = ( long * ) realloc ( Singleton_Seq, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ }
+
+ Scaffold_Number++;
+ A_num[Scaffold_Number - 1] = 0;
+ C_num[Scaffold_Number - 1] = 0;
+ G_num[Scaffold_Number - 1] = 0;
+ T_num[Scaffold_Number - 1] = 0;
+ N_num[Scaffold_Number - 1] = 0;
+ Non_ACGTN[Scaffold_Number - 1] = 0;
+ Size_Seq[Scaffold_Number - 1] = 0;
+ Singleton_Seq[Scaffold_Number - 1] = 0;
+ Nucleotide = fgetc ( fp );
+
+ if ( Nucleotide == 'C' )
+ {
+ Singleton_Number++;
+ Singleton_Seq[Scaffold_Number - 1] ++;
+ }
+
+ fgets ( buf, 4000, fp );
+ }
+ else if ( ( Nucleotide == 'N' ) || ( Nucleotide == 'n' ) )
+ {
+ N_num[Scaffold_Number - 1] ++;
+ N_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else if ( ( Nucleotide == 'A' ) || ( Nucleotide == 'a' ) )
+ {
+ A_num[Scaffold_Number - 1] ++;
+ A_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else if ( ( Nucleotide == 'C' ) || ( Nucleotide == 'c' ) )
+ {
+ C_num[Scaffold_Number - 1] ++;
+ C_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else if ( ( Nucleotide == 'G' ) || ( Nucleotide == 'g' ) )
+ {
+ G_num[Scaffold_Number - 1] ++;
+ G_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else if ( ( Nucleotide == 'T' ) || ( Nucleotide == 't' ) )
+ {
+ T_num[Scaffold_Number - 1] ++;
+ T_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else
+ {
+ if ( ( Nucleotide != '\n' ) && ( Nucleotide != '\r' ) )
+ {
+ Non_ACGTN[Scaffold_Number - 1] ++;
+ Non_ACGTN_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ }
+
+ Nucleotide = fgetc ( fp );
+ }
+
+ if ( Size_Seq[Scaffold_Number - 1] < cut_off_len )
+ {
+ A_num_all = A_num_all - A_num[Scaffold_Number - 1];
+ C_num_all = C_num_all - C_num[Scaffold_Number - 1];
+ G_num_all = G_num_all - G_num[Scaffold_Number - 1];
+ T_num_all = T_num_all - T_num[Scaffold_Number - 1];
+ N_num_all = N_num_all - N_num[Scaffold_Number - 1];
+ Non_ACGTN_all = Non_ACGTN_all - Non_ACGTN[Scaffold_Number - 1];
+ Size_includeN = Size_includeN - Size_Seq[Scaffold_Number - 1];
+ Singleton_Number = Singleton_Number - Singleton_Seq[Scaffold_Number - 1];
+ Scaffold_Number = Scaffold_Number - 1;
+ }
+
+ qsort ( Size_Seq, Scaffold_Number, sizeof ( Size_Seq[0] ), cmp_int );
+ fprintf ( fo, "Size_includeN\t%lld\n", Size_includeN );
+ fprintf ( fo, "Size_withoutN\t%lld\n", Size_includeN - N_num_all );
+ fprintf ( fo, "Scaffold_Num\t%ld\n", Scaffold_Number );
+ fprintf ( fo, "Mean_Size\t%lld\n", Size_includeN / Scaffold_Number );
+ fprintf ( fo, "Median_Size\t%ld\n", Size_Seq[ ( Scaffold_Number + 1 ) / 2 - 1] );
+ fprintf ( fo, "Longest_Seq\t%ld\n", Size_Seq[Scaffold_Number - 1] );
+ fprintf ( fo, "Shortest_Seq\t%ld\n", Size_Seq[0] );
+ fprintf ( fo, "Singleton_Num\t%ld\n", Singleton_Number );
+ fprintf ( fo, "Average_length_of_break(N)_in_scaffold\t%lld\n", N_num_all / Scaffold_Number );
+ fprintf ( fo, "\n" );
+
+ if ( known_genome_size )
+ {
+ fprintf ( fo, "Known_genome_size\t%ld\n", known_genome_size );
+ fprintf ( fo, "Total_scaffold_length_as_percentage_of_known_genome_size\t%.2f%\n", 100 * ( 1.0 * Size_includeN / known_genome_size ) );
+ }
+ else
+ {
+ fprintf ( fo, "Known_genome_size\tNaN\n" );
+ fprintf ( fo, "Total_scaffold_length_as_percentage_of_known_genome_size\tNaN\n" );
+ }
+
+ fprintf ( fo, "\n" );
+
+ for ( k = 0; k < Scaffold_Number; k++ )
+ {
+ if ( Size_Seq[k] > 100 )
+ {
+ n100++;
+ }
+
+ if ( Size_Seq[k] > 500 )
+ {
+ n500++;
+ }
+
+ if ( Size_Seq[k] > 1000 )
+ {
+ n1k++;
+ }
+
+ if ( Size_Seq[k] > 10000 )
+ {
+ n10k++;
+ }
+
+ if ( Size_Seq[k] > 100000 )
+ {
+ n100k++;
+ }
+
+ if ( Size_Seq[k] > 1000000 )
+ {
+ n1m++;
+ }
+ }
+
+ fprintf ( fo, "scaffolds>100 \t%ld\t%.2f%\n", n100 , 100 * ( 1.0 * n100 / Scaffold_Number ) );
+ fprintf ( fo, "scaffolds>500 \t%ld\t%.2f%\n", n500 , 100 * ( 1.0 * n500 / Scaffold_Number ) );
+ fprintf ( fo, "scaffolds>1K \t%ld\t%.2f%\n", n1k , 100 * ( 1.0 * n1k / Scaffold_Number ) );
+ fprintf ( fo, "scaffolds>10K \t%ld\t%.2f%\n", n10k , 100 * ( 1.0 * n10k / Scaffold_Number ) );
+ fprintf ( fo, "scaffolds>100K\t%ld\t%.2f%\n", n100k, 100 * ( 1.0 * n100k / Scaffold_Number ) );
+ fprintf ( fo, "scaffolds>1M \t%ld\t%.2f%\n", n1m , 100 * ( 1.0 * n1m / Scaffold_Number ) );
+ fprintf ( fo, "\n" );
+ fprintf ( fo, "Nucleotide_A\t%lld\t%.2f%\n", A_num_all, 100 * ( 1.0 * A_num_all / Size_includeN ) );
+ fprintf ( fo, "Nucleotide_C\t%lld\t%.2f%\n", C_num_all, 100 * ( 1.0 * C_num_all / Size_includeN ) );
+ fprintf ( fo, "Nucleotide_G\t%lld\t%.2f%\n", G_num_all, 100 * ( 1.0 * G_num_all / Size_includeN ) );
+ fprintf ( fo, "Nucleotide_T\t%lld\t%.2f%\n", T_num_all, 100 * ( 1.0 * T_num_all / Size_includeN ) );
+ fprintf ( fo, "GapContent_N\t%lld\t%.2f%\n", N_num_all, 100 * ( 1.0 * N_num_all / Size_includeN ) );
+ fprintf ( fo, "Non_ACGTN\t%lld\t%.2f%\n", Non_ACGTN_all, 100 * ( 1.0 * Non_ACGTN_all / Size_includeN ) );
+ fprintf ( fo, "GC_Content\t%.2f%\t\t(G+C)/(A+C+G+T)\n", 100 * ( 1.0 * ( G_num_all + C_num_all ) / ( A_num_all + C_num_all + G_num_all + T_num_all ) ) );
+ fprintf ( fo, "\n" );
+
+ for ( k = 0; k < 10; k++ )
+ {
+ flag[k] = 0;
+ }
+
+ for ( k = Scaffold_Number - 1; k >= 0; k-- )
+ {
+ Sum = Sum + Size_Seq[k];
+
+ if ( ( Sum >= Size_includeN * 0.1 ) && ( Sum < Size_includeN * 0.2 ) && ( flag[1] == 0 ) )
+ {
+ fprintf ( fo, "N10\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[1] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.2 ) && ( Sum < Size_includeN * 0.3 ) && ( flag[2] == 0 ) )
+ {
+ fprintf ( fo, "N20\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[2] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.3 ) && ( Sum < Size_includeN * 0.4 ) && ( flag[3] == 0 ) )
+ {
+ fprintf ( fo, "N30\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[3] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.4 ) && ( Sum < Size_includeN * 0.5 ) && ( flag[4] == 0 ) )
+ {
+ fprintf ( fo, "N40\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[4] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.5 ) && ( Sum < Size_includeN * 0.6 ) && ( flag[5] == 0 ) )
+ {
+ fprintf ( fo, "N50\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[5] = 1;
+ N50 = Size_Seq[k];
+ }
+ else if ( ( Sum >= Size_includeN * 0.6 ) && ( Sum < Size_includeN * 0.7 ) && ( flag[6] == 0 ) )
+ {
+ fprintf ( fo, "N60\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[6] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.7 ) && ( Sum < Size_includeN * 0.8 ) && ( flag[7] == 0 ) )
+ {
+ fprintf ( fo, "N70\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[7] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.8 ) && ( Sum < Size_includeN * 0.9 ) && ( flag[8] == 0 ) )
+ {
+ fprintf ( fo, "N80\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[8] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.9 ) && ( flag[9] == 0 ) )
+ {
+ fprintf ( fo, "N90\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[9] = 1;
+ }
+
+ if ( ( Sum >= known_genome_size * 0.5 ) && ( flag_known == 0 ) )
+ {
+ N50_known = Size_Seq[k];
+ Num_N50_known = Scaffold_Number - k;
+ flag_known = 1;
+ }
+ }
+
+ if ( flag[5] == 0 )
+ {
+ Sum = 0;
+
+ for ( k = Scaffold_Number - 1; k >= 0; k-- )
+ {
+ Sum = Sum + Size_Seq[k];
+
+ if ( Sum >= Size_includeN * 0.5 )
+ {
+ fprintf ( fo, "N50\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ break;
+ }
+ }
+ }
+
+ fprintf ( fo, "\n" );
+
+ if ( known_genome_size )
+ {
+ fprintf ( fo, "NG50\t%ld\t%ld\n", N50_known, Num_N50_known );
+ fprintf ( fo, "N50_scaffold-NG50_scaffold_length_difference\t%ld\n", abs ( N50 - N50_known ) );
+ }
+ else
+ {
+ fprintf ( fo, "NG50\tNaN\tNaN\n" );
+ fprintf ( fo, "N50_scaffold-NG50_scaffold_length_difference\tNaN\n" );
+ }
+
+ fprintf ( fo, "\n" );
+ free ( A_num );
+ free ( C_num );
+ free ( G_num );
+ free ( T_num );
+ free ( N_num );
+ free ( Non_ACGTN );
+ free ( Singleton_Seq );
+ free ( Size_Seq );
+ Scaffold_Number_Scaf = Scaffold_Number;
+ Singleton_Number_Scaf = Singleton_Number;
+ /*********************** Contig ******************************/
+ fprintf ( fo, "<-- Information for assembly Contig '%s.contig'.(cut_off_length < 100bp) -->\n\n", graphfile );
+ cut_off_len = 0;
+ Scaffold_Number = 0;
+ Singleton_Number = 0;
+ Singleton_Seq = ( long * ) malloc ( sizeof ( long ) );
+ A_num_all = 0;
+ A_num = ( long * ) malloc ( sizeof ( long ) );
+ C_num_all = 0;
+ C_num = ( long * ) malloc ( sizeof ( long ) );
+ G_num_all = 0;
+ G_num = ( long * ) malloc ( sizeof ( long ) );
+ T_num_all = 0;
+ T_num = ( long * ) malloc ( sizeof ( long ) );
+ N_num_all = 0;
+ N_num = ( long * ) malloc ( sizeof ( long ) );
+ Non_ACGTN_all = 0;
+ Non_ACGTN = ( long * ) malloc ( sizeof ( long ) );
+ Size_includeN = 0;
+ Size_Seq = ( long * ) malloc ( sizeof ( long ) );
+ Sum = 0;
+ n100 = 0;
+ n500 = 0;
+ n1k = 0;
+ n10k = 0;
+ n100k = 0;
+ n1m = 0;
+ N50 = 0;
+ N50_known = 0;
+ Num_N50_known = 0;
+ flag_known = 0;
+ cut_off_len = len_cut;
+ A_num[Scaffold_Number] = 0;
+ C_num[Scaffold_Number] = 0;
+ G_num[Scaffold_Number] = 0;
+ T_num[Scaffold_Number] = 0;
+ N_num[Scaffold_Number] = 0;
+ Non_ACGTN[Scaffold_Number] = 0;
+ Size_Seq[Scaffold_Number] = 0;
+ Singleton_Seq[Scaffold_Number] = 0;
+ Nucleotide = fgetc ( fp2 );
+
+ while ( Nucleotide != (char) EOF ) /* Bug Fix */
+ {
+ if ( Nucleotide == '>' )
+ {
+ if ( ( Scaffold_Number > 0 ) && ( Size_Seq[Scaffold_Number - 1] < cut_off_len ) )
+ {
+ A_num_all = A_num_all - A_num[Scaffold_Number - 1];
+ C_num_all = C_num_all - C_num[Scaffold_Number - 1];
+ G_num_all = G_num_all - G_num[Scaffold_Number - 1];
+ T_num_all = T_num_all - T_num[Scaffold_Number - 1];
+ N_num_all = N_num_all - N_num[Scaffold_Number - 1];
+ Non_ACGTN_all = Non_ACGTN_all - Non_ACGTN[Scaffold_Number - 1];
+ Size_includeN = Size_includeN - Size_Seq[Scaffold_Number - 1];
+ Singleton_Number = Singleton_Number - Singleton_Seq[Scaffold_Number - 1];
+ Scaffold_Number = Scaffold_Number - 1;
+ }
+ else
+ {
+ Size_Seq = ( long * ) realloc ( Size_Seq, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ A_num = ( long * ) realloc ( A_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ C_num = ( long * ) realloc ( C_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ G_num = ( long * ) realloc ( G_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ T_num = ( long * ) realloc ( T_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ N_num = ( long * ) realloc ( N_num, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ Non_ACGTN = ( long * ) realloc ( Non_ACGTN, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ Singleton_Seq = ( long * ) realloc ( Singleton_Seq, ( Scaffold_Number + 2 ) * sizeof ( long ) );
+ }
+
+ Scaffold_Number++;
+ A_num[Scaffold_Number - 1] = 0;
+ C_num[Scaffold_Number - 1] = 0;
+ G_num[Scaffold_Number - 1] = 0;
+ T_num[Scaffold_Number - 1] = 0;
+ N_num[Scaffold_Number - 1] = 0;
+ Non_ACGTN[Scaffold_Number - 1] = 0;
+ Size_Seq[Scaffold_Number - 1] = 0;
+ Singleton_Seq[Scaffold_Number - 1] = 0;
+ Nucleotide = fgetc ( fp2 );
+
+ if ( Nucleotide == 'C' )
+ {
+ Singleton_Number++;
+ Singleton_Seq[Scaffold_Number - 1] ++;
+ }
+
+ fgets ( buf, 4000, fp2 );
+ }
+ else if ( ( Nucleotide == 'N' ) || ( Nucleotide == 'n' ) )
+ {
+ N_num[Scaffold_Number - 1] ++;
+ N_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else if ( ( Nucleotide == 'A' ) || ( Nucleotide == 'a' ) )
+ {
+ A_num[Scaffold_Number - 1] ++;
+ A_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else if ( ( Nucleotide == 'C' ) || ( Nucleotide == 'c' ) )
+ {
+ C_num[Scaffold_Number - 1] ++;
+ C_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else if ( ( Nucleotide == 'G' ) || ( Nucleotide == 'g' ) )
+ {
+ G_num[Scaffold_Number - 1] ++;
+ G_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else if ( ( Nucleotide == 'T' ) || ( Nucleotide == 't' ) )
+ {
+ T_num[Scaffold_Number - 1] ++;
+ T_num_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ else
+ {
+ if ( ( Nucleotide != '\n' ) && ( Nucleotide != '\r' ) )
+ {
+ Non_ACGTN[Scaffold_Number - 1] ++;
+ Non_ACGTN_all++;
+ Size_Seq[Scaffold_Number - 1] ++;
+ Size_includeN++;
+ }
+ }
+
+ Nucleotide = fgetc ( fp2 );
+ }
+
+ if ( Size_Seq[Scaffold_Number - 1] < cut_off_len )
+ {
+ A_num_all = A_num_all - A_num[Scaffold_Number - 1];
+ C_num_all = C_num_all - C_num[Scaffold_Number - 1];
+ G_num_all = G_num_all - G_num[Scaffold_Number - 1];
+ T_num_all = T_num_all - T_num[Scaffold_Number - 1];
+ N_num_all = N_num_all - N_num[Scaffold_Number - 1];
+ Non_ACGTN_all = Non_ACGTN_all - Non_ACGTN[Scaffold_Number - 1];
+ Size_includeN = Size_includeN - Size_Seq[Scaffold_Number - 1];
+ Singleton_Number = Singleton_Number - Singleton_Seq[Scaffold_Number - 1];
+ Scaffold_Number = Scaffold_Number - 1;
+ }
+
+ qsort ( Size_Seq, Scaffold_Number, sizeof ( Size_Seq[0] ), cmp_int );
+ fprintf ( fo, "Size_includeN\t%lld\n", Size_includeN );
+ fprintf ( fo, "Size_withoutN\t%lld\n", Size_includeN - N_num_all );
+ fprintf ( fo, "Contig_Num\t%ld\n", Scaffold_Number );
+ fprintf ( fo, "Mean_Size\t%lld\n", Size_includeN / Scaffold_Number );
+ fprintf ( fo, "Median_Size\t%ld\n", Size_Seq[ ( Scaffold_Number + 1 ) / 2 - 1] );
+ fprintf ( fo, "Longest_Seq\t%ld\n", Size_Seq[Scaffold_Number - 1] );
+ fprintf ( fo, "Shortest_Seq\t%ld\n", Size_Seq[0] );
+ fprintf ( fo, "\n" );
+
+ for ( k = 0; k < Scaffold_Number; k++ )
+ {
+ if ( Size_Seq[k] > 100 )
+ {
+ n100++;
+ }
+
+ if ( Size_Seq[k] > 500 )
+ {
+ n500++;
+ }
+
+ if ( Size_Seq[k] > 1000 )
+ {
+ n1k++;
+ }
+
+ if ( Size_Seq[k] > 10000 )
+ {
+ n10k++;
+ }
+
+ if ( Size_Seq[k] > 100000 )
+ {
+ n100k++;
+ }
+
+ if ( Size_Seq[k] > 1000000 )
+ {
+ n1m++;
+ }
+ }
+
+ fprintf ( fo, "Contig>100 \t%ld\t%.2f%\n", n100 , 100 * ( 1.0 * n100 / Scaffold_Number ) );
+ fprintf ( fo, "Contig>500 \t%ld\t%.2f%\n", n500 , 100 * ( 1.0 * n500 / Scaffold_Number ) );
+ fprintf ( fo, "Contig>1K \t%ld\t%.2f%\n", n1k , 100 * ( 1.0 * n1k / Scaffold_Number ) );
+ fprintf ( fo, "Contig>10K \t%ld\t%.2f%\n", n10k , 100 * ( 1.0 * n10k / Scaffold_Number ) );
+ fprintf ( fo, "Contig>100K\t%ld\t%.2f%\n", n100k, 100 * ( 1.0 * n100k / Scaffold_Number ) );
+ fprintf ( fo, "Contig>1M \t%ld\t%.2f%\n", n1m , 100 * ( 1.0 * n1m / Scaffold_Number ) );
+ fprintf ( fo, "\n" );
+ fprintf ( fo, "Nucleotide_A\t%lld\t%.2f%\n", A_num_all, 100 * ( 1.0 * A_num_all / Size_includeN ) );
+ fprintf ( fo, "Nucleotide_C\t%lld\t%.2f%\n", C_num_all, 100 * ( 1.0 * C_num_all / Size_includeN ) );
+ fprintf ( fo, "Nucleotide_G\t%lld\t%.2f%\n", G_num_all, 100 * ( 1.0 * G_num_all / Size_includeN ) );
+ fprintf ( fo, "Nucleotide_T\t%lld\t%.2f%\n", T_num_all, 100 * ( 1.0 * T_num_all / Size_includeN ) );
+ fprintf ( fo, "GapContent_N\t%lld\t%.2f%\n", N_num_all, 100 * ( 1.0 * N_num_all / Size_includeN ) );
+ fprintf ( fo, "Non_ACGTN\t%lld\t%.2f%\n", Non_ACGTN_all, 100 * ( 1.0 * Non_ACGTN_all / Size_includeN ) );
+ fprintf ( fo, "GC_Content\t%.2f%\t\t(G+C)/(A+C+G+T)\n", 100 * ( 1.0 * ( G_num_all + C_num_all ) / ( A_num_all + C_num_all + G_num_all + T_num_all ) ) );
+ fprintf ( fo, "\n" );
+
+ for ( k = 0; k < 10; k++ )
+ {
+ flag[k] = 0;
+ }
+
+ for ( k = Scaffold_Number - 1; k >= 0; k-- )
+ {
+ Sum = Sum + Size_Seq[k];
+
+ if ( ( Sum >= Size_includeN * 0.1 ) && ( Sum < Size_includeN * 0.2 ) && ( flag[1] == 0 ) )
+ {
+ fprintf ( fo, "N10\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[1] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.2 ) && ( Sum < Size_includeN * 0.3 ) && ( flag[2] == 0 ) )
+ {
+ fprintf ( fo, "N20\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[2] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.3 ) && ( Sum < Size_includeN * 0.4 ) && ( flag[3] == 0 ) )
+ {
+ fprintf ( fo, "N30\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[3] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.4 ) && ( Sum < Size_includeN * 0.5 ) && ( flag[4] == 0 ) )
+ {
+ fprintf ( fo, "N40\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[4] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.5 ) && ( Sum < Size_includeN * 0.6 ) && ( flag[5] == 0 ) )
+ {
+ fprintf ( fo, "N50\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[5] = 1;
+ N50 = Size_Seq[k];
+ }
+ else if ( ( Sum >= Size_includeN * 0.6 ) && ( Sum < Size_includeN * 0.7 ) && ( flag[6] == 0 ) )
+ {
+ fprintf ( fo, "N60\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[6] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.7 ) && ( Sum < Size_includeN * 0.8 ) && ( flag[7] == 0 ) )
+ {
+ fprintf ( fo, "N70\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[7] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.8 ) && ( Sum < Size_includeN * 0.9 ) && ( flag[8] == 0 ) )
+ {
+ fprintf ( fo, "N80\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[8] = 1;
+ }
+ else if ( ( Sum >= Size_includeN * 0.9 ) && ( flag[9] == 0 ) )
+ {
+ fprintf ( fo, "N90\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ flag[9] = 1;
+ }
+
+ if ( ( Sum >= known_genome_size * 0.5 ) && ( flag_known == 0 ) )
+ {
+ N50_known = Size_Seq[k];
+ Num_N50_known = Scaffold_Number - k;
+ flag_known = 1;
+ }
+ }
+
+ if ( flag[5] == 0 )
+ {
+ Sum = 0;
+
+ for ( k = Scaffold_Number - 1; k >= 0; k-- )
+ {
+ Sum = Sum + Size_Seq[k];
+
+ if ( Sum >= Size_includeN * 0.5 )
+ {
+ fprintf ( fo, "N50\t%ld\t%ld\n", Size_Seq[k], Scaffold_Number - k );
+ break;
+ }
+ }
+ }
+
+ fprintf ( fo, "\n" );
+
+ if ( known_genome_size )
+ {
+ fprintf ( fo, "NG50\t%ld\t%ld\n", N50_known, Num_N50_known );
+ fprintf ( fo, "N50_contig-NG50_contig_length_difference\t%ld\n", abs ( N50 - N50_known ) );
+ }
+ else
+ {
+ fprintf ( fo, "NG50\tNaN\tNaN\n" );
+ fprintf ( fo, "N50_contig-NG50_contig_length_difference\tNaN\n" );
+ }
+
+ fprintf ( fo, "\n" );
+ free ( A_num );
+ free ( C_num );
+ free ( G_num );
+ free ( T_num );
+ free ( N_num );
+ free ( Non_ACGTN );
+ free ( Singleton_Seq );
+ free ( Size_Seq );
+ Scaffold_Number_Contig = Scaffold_Number;
+ fprintf ( fo, "Number_of_contigs_in_scaffolds\t%ld\n", Scaffold_Number_Contig - Singleton_Number_Scaf );
+ fprintf ( fo, "Number_of_contigs_not_in_scaffolds(Singleton)\t%ld\n", Singleton_Number_Scaf );
+ fprintf ( fo, "Average_number_of_contigs_per_scaffold\t%.1f\n", 1.0 * ( Scaffold_Number_Contig - Singleton_Number_Scaf ) / ( Scaffold_Number_Scaf - Singleton_Number_Scaf ) );
+ fprintf ( fo, "\n" );
+ fclose ( fp );
+ fclose ( fp2 );
+ fclose ( fo );
}
/*************************************************
@@ -3820,20 +4106,22 @@ Output:
Return:
Connection between two contigs.
*************************************************/
-CONNECT * getCnt ( unsigned int from_c, unsigned int to_c )
+CONNECT *getCnt ( unsigned int from_c, unsigned int to_c )
{
- CONNECT * pcnt;
- pcnt = contig_array[from_c].downwardConnect;
+ CONNECT *pcnt;
+ pcnt = contig_array[from_c].downwardConnect;
- while ( pcnt )
- {
- if ( pcnt->contigID == to_c )
- { return pcnt; }
+ while ( pcnt )
+ {
+ if ( pcnt->contigID == to_c )
+ {
+ return pcnt;
+ }
- pcnt = pcnt->next;
- }
+ pcnt = pcnt->next;
+ }
- return pcnt;
+ return pcnt;
}
/*************************************************
@@ -3852,26 +4140,30 @@ Return:
scaffold.
Contig's downstream connection number otherwise.
*************************************************/
-int allConnect ( unsigned int ctg, CONNECT * preCNT )
+int allConnect ( unsigned int ctg, CONNECT *preCNT )
{
- if ( preCNT && preCNT->nextInScaf )
- { return 1; }
+ if ( preCNT && preCNT->nextInScaf )
+ {
+ return 1;
+ }
- CONNECT * cn_temp;
- int count = 0;
+ CONNECT *cn_temp;
+ int count = 0;
- if ( !contig_array[ctg].downwardConnect )
- { return count; }
+ if ( !contig_array[ctg].downwardConnect )
+ {
+ return count;
+ }
- cn_temp = contig_array[ctg].downwardConnect;
+ cn_temp = contig_array[ctg].downwardConnect;
- while ( cn_temp )
- {
- count++;
- cn_temp = cn_temp->next;
- }
+ while ( cn_temp )
+ {
+ count++;
+ cn_temp = cn_temp->next;
+ }
- return count;
+ return count;
}
/*************************************************
@@ -3890,105 +4182,137 @@ Return:
*************************************************/
int get_ctg_score2 ( int pos, int tempCounter )
{
- int id, innum, outnum, in = 0, out = 0, i, currId;
- CONNECT * dh_cnt;
- id = * ( unsigned int * ) darrayGet ( tempArray, pos );
- outnum = allConnect ( id, NULL );
- innum = allConnect ( getTwinCtg ( id ), NULL );
- int outlen = 0, inlen = 0, outcut, incut;
-
- if ( contig_array[id].downwardConnect )
- { outlen = contig_array[id].downwardConnect->gapLen; }
-
- if ( contig_array[getTwinCtg ( id )].downwardConnect )
- { inlen = contig_array[getTwinCtg ( id )].downwardConnect->gapLen; }
-
- outcut = outlen / overlaplen;
- incut = inlen / overlaplen;
-
- if ( outcut > outnum * 10 || outcut < outnum * 2 )
- { outcut = outnum * 10; }
-
- if ( incut > innum * 10 || incut < innum * 2 )
- { incut = innum * 10; }
-
- int start = pos - incut > 0 ? pos - incut : 0;
- int end = pos + outcut < tempCounter ? pos + outcut : tempCounter;
-
- for ( i = start; i < end; i++ )
- {
- if ( i == pos )
- { continue; }
-
- currId = * ( unsigned int * ) darrayGet ( tempArray, i );
-
- if ( i < pos )
- {
- dh_cnt = getCnt ( currId, id );
-
- if ( dh_cnt && dh_cnt->weight > 0 )
- { in++; }
- }
-
- if ( i > pos )
- {
- dh_cnt = getCnt ( id, currId );
-
- if ( dh_cnt && dh_cnt->weight > 0 )
- { out++; }
- }
- }
-
- if ( innum > pos )
- { innum = pos; }
-
- if ( outnum > tempCounter - pos - 1 )
- { outnum = tempCounter - pos - 1; }
-
- if ( innum > 0 && outnum > 0 )
- {
- if ( pos < tempCounter - 1 && pos > 0 )
- {
- if ( outnum < 3 && out == 0 )
- {
- if ( in > 0 )
- { return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 ); }
- }
-
- if ( innum < 3 && in == 0 )
- {
- if ( out > 0 )
- { return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 ); }
- }
-
- if ( in == 0 || out == 0 )
- { return 0; }
-
- return ( int ) ( ( ( double ) ( in * out ) / ( double ) ( innum * outnum ) ) * 100 );
- }
-
- if ( pos == 0 )
- { return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 ); }
-
- if ( pos == tempCounter - 1 )
- { return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 ); }
- }
- else if ( innum > 0 )
- {
- if ( pos == tempCounter - 1 && in == 1 && innum > 5 )
- { return 0; }
-
- return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
- }
- else if ( outnum > 0 )
- {
- if ( pos == 0 && out == 1 && outnum > 5 )
- { return 0; }
-
- return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
- }
-
- return 0;
+ int id, innum, outnum, in = 0, out = 0, i, currId;
+ CONNECT *dh_cnt;
+ id = * ( unsigned int * ) darrayGet ( tempArray, pos );
+ outnum = allConnect ( id, NULL );
+ innum = allConnect ( getTwinCtg ( id ), NULL );
+ int outlen = 0, inlen = 0, outcut, incut;
+
+ if ( contig_array[id].downwardConnect )
+ {
+ outlen = contig_array[id].downwardConnect->gapLen;
+ }
+
+ if ( contig_array[getTwinCtg ( id )].downwardConnect )
+ {
+ inlen = contig_array[getTwinCtg ( id )].downwardConnect->gapLen;
+ }
+
+ outcut = outlen / overlaplen;
+ incut = inlen / overlaplen;
+
+ if ( outcut > outnum * 10 || outcut < outnum * 2 )
+ {
+ outcut = outnum * 10;
+ }
+
+ if ( incut > innum * 10 || incut < innum * 2 )
+ {
+ incut = innum * 10;
+ }
+
+ int start = pos - incut > 0 ? pos - incut : 0;
+ int end = pos + outcut < tempCounter ? pos + outcut : tempCounter;
+
+ for ( i = start; i < end; i++ )
+ {
+ if ( i == pos )
+ {
+ continue;
+ }
+
+ currId = * ( unsigned int * ) darrayGet ( tempArray, i );
+
+ if ( i < pos )
+ {
+ dh_cnt = getCnt ( currId, id );
+
+ if ( dh_cnt && dh_cnt->weight > 0 )
+ {
+ in++;
+ }
+ }
+
+ if ( i > pos )
+ {
+ dh_cnt = getCnt ( id, currId );
+
+ if ( dh_cnt && dh_cnt->weight > 0 )
+ {
+ out++;
+ }
+ }
+ }
+
+ if ( innum > pos )
+ {
+ innum = pos;
+ }
+
+ if ( outnum > tempCounter - pos - 1 )
+ {
+ outnum = tempCounter - pos - 1;
+ }
+
+ if ( innum > 0 && outnum > 0 )
+ {
+ if ( pos < tempCounter - 1 && pos > 0 )
+ {
+ if ( outnum < 3 && out == 0 )
+ {
+ if ( in > 0 )
+ {
+ return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
+ }
+ }
+
+ if ( innum < 3 && in == 0 )
+ {
+ if ( out > 0 )
+ {
+ return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
+ }
+ }
+
+ if ( in == 0 || out == 0 )
+ {
+ return 0;
+ }
+
+ return ( int ) ( ( ( double ) ( in * out ) / ( double ) ( innum * outnum ) ) * 100 );
+ }
+
+ if ( pos == 0 )
+ {
+ return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
+ }
+
+ if ( pos == tempCounter - 1 )
+ {
+ return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
+ }
+ }
+ else if ( innum > 0 )
+ {
+ if ( pos == tempCounter - 1 && in == 1 && innum > 5 )
+ {
+ return 0;
+ }
+
+ return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
+ }
+ else if ( outnum > 0 )
+ {
+ if ( pos == 0 && out == 1 && outnum > 5 )
+ {
+ return 0;
+ }
+
+ return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
+ }
+
+ return 0;
}
/*************************************************
@@ -4007,198 +4331,246 @@ Return:
*************************************************/
int get_ctg_score ( int pos, int num3, int num5, int flag )
{
- int i = 0, in = 0, out = 0, innum = 0, outnum = 0, threeid;
- CONNECT * dh_cnt;
- int id;
-
- if ( flag == 0 )
- {
- id = * ( unsigned int * ) darrayGet ( scaf3, pos );
- int end = pos > 100 ? pos - 100 : 0;
- int start = pos + 100 < num3 ? pos + 100 : num3;
- in = 0, out = 0;
-
- for ( i = start; i >= end; i-- )
- {
- threeid = * ( unsigned int * ) darrayGet ( scaf3, i );
-
- if ( threeid == id )
- {
- pos = i;
- continue;
- }
-
- dh_cnt = getCnt ( id, threeid );
-
- if ( dh_cnt && dh_cnt->weight > 0 )
- {
- out++;
- }
-
- dh_cnt = getCnt ( threeid, id );
-
- if ( dh_cnt && dh_cnt->weight > 0 )
- { in++; }
- }
-
- outnum = allConnect ( id, NULL );
- innum = allConnect ( getTwinCtg ( id ), NULL );
- int num5_check = 0;
-
- if ( pos - end < outnum )
- {
- for ( i = 0; i < num5; i++ )
- {
- num5_check++;
- threeid = * ( unsigned int * ) darrayGet ( scaf5, i );
- dh_cnt = getCnt ( id, threeid );
-
- if ( dh_cnt && dh_cnt->weight > 0 )
- { out++; }
-
- if ( num5_check == outnum )
- { break; }
- }
- }
-
- if ( pos - end + num5_check < outnum )
- { outnum = pos - end + num5_check; }
-
- if ( start - pos < innum )
- { innum = start - pos; }
-
- if ( innum > 0 && outnum > 0 )
- {
- if ( pos != num3 && pos > 0 )
- {
- if ( outnum < 5 && out == 0 )
- {
- if ( innum > 0 )
- { return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 ); }
- }
-
- if ( innum < 5 && in == 0 )
- {
- if ( outnum > 0 )
- { return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 ); }
- }
-
- if ( in == 0 || out == 0 )
- { return 0; }
-
- return ( int ) ( ( ( double ) ( in * out ) / ( double ) ( innum * outnum ) ) * 100 );
- }
-
- if ( pos == num3 )
- { return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 ); }
-
- if ( pos == 0 )
- { return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 ); }
- }
- else if ( innum > 0 )
- {
- return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
- }
- else if ( outnum > 0 )
- {
- if ( pos == num3 && out == 1 && outnum > 5 )
- { return 0; }
-
- return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
- }
-
- return 0;
- }
- else
- {
- id = * ( unsigned int * ) darrayGet ( scaf5, pos );
- int start = pos > 100 ? pos - 100 : 0;
- int end = pos + 100 < num5 ? pos + 100 : num5;
- in = 0, out = 0;
-
- for ( i = start; i < end; i++ )
- {
- threeid = * ( unsigned int * ) darrayGet ( scaf5, i );
-
- if ( threeid == id )
- {
- pos = i;
- continue;
- }
-
- dh_cnt = getCnt ( id, threeid );
-
- if ( dh_cnt && dh_cnt->weight > 0 )
- { out++; }
-
- dh_cnt = getCnt ( threeid, id );
-
- if ( dh_cnt && dh_cnt->weight > 0 )
- { in++; }
- }
-
- outnum = allConnect ( id, NULL );
- innum = allConnect ( getTwinCtg ( id ), NULL );
- int num3_check = 0;
-
- if ( pos - start < innum )
- {
- for ( i = 0; i < num3; i++ )
- {
- num3_check++;
- threeid = * ( unsigned int * ) darrayGet ( scaf3, i );
- dh_cnt = getCnt ( threeid, id );
-
- if ( dh_cnt && dh_cnt->weight > 0 )
- { in++; }
-
- if ( num3_check == innum )
- { break; }
- }
- }
-
- if ( pos - start + num3_check < innum )
- { innum = pos - start + num3_check; }
-
- if ( end - pos - 1 < outnum )
- { outnum = end - pos - 1; }
-
- if ( innum > 0 && outnum > 0 )
- {
- if ( pos != num5 - 1 && pos > 0 )
- {
- if ( outnum < 5 && out == 0 && innum > 0 )
- { return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 ); }
-
- if ( innum < 5 && in == 0 && outnum > 0 )
- { return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 ); }
-
- if ( in == 0 || out == 0 )
- { return 0; }
-
- return ( int ) ( ( ( double ) ( in * out ) / ( double ) ( innum * outnum ) ) * 100 );
- }
-
- if ( pos == 0 )
- { return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 ); }
-
- if ( pos == num5 - 1 )
- { return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 ); }
- }
- else if ( innum > 0 )
- {
- if ( pos == num5 - 1 && in == 1 && innum > 5 )
- { return 0; }
-
- return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
- }
- else if ( outnum > 0 )
- { return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 ); }
-
- return 0;
- }
-
- return 0;
+ int i = 0, in = 0, out = 0, innum = 0, outnum = 0, threeid;
+ CONNECT *dh_cnt;
+ int id;
+
+ if ( flag == 0 )
+ {
+ id = * ( unsigned int * ) darrayGet ( scaf3, pos );
+ int end = pos > 100 ? pos - 100 : 0;
+ int start = pos + 100 < num3 ? pos + 100 : num3;
+ in = 0, out = 0;
+
+ for ( i = start; i >= end; i-- )
+ {
+ threeid = * ( unsigned int * ) darrayGet ( scaf3, i );
+
+ if ( threeid == id )
+ {
+ pos = i;
+ continue;
+ }
+
+ dh_cnt = getCnt ( id, threeid );
+
+ if ( dh_cnt && dh_cnt->weight > 0 )
+ {
+ out++;
+ }
+
+ dh_cnt = getCnt ( threeid, id );
+
+ if ( dh_cnt && dh_cnt->weight > 0 )
+ {
+ in++;
+ }
+ }
+
+ outnum = allConnect ( id, NULL );
+ innum = allConnect ( getTwinCtg ( id ), NULL );
+ int num5_check = 0;
+
+ if ( pos - end < outnum )
+ {
+ for ( i = 0; i < num5; i++ )
+ {
+ num5_check++;
+ threeid = * ( unsigned int * ) darrayGet ( scaf5, i );
+ dh_cnt = getCnt ( id, threeid );
+
+ if ( dh_cnt && dh_cnt->weight > 0 )
+ {
+ out++;
+ }
+
+ if ( num5_check == outnum )
+ {
+ break;
+ }
+ }
+ }
+
+ if ( pos - end + num5_check < outnum )
+ {
+ outnum = pos - end + num5_check;
+ }
+
+ if ( start - pos < innum )
+ {
+ innum = start - pos;
+ }
+
+ if ( innum > 0 && outnum > 0 )
+ {
+ if ( pos != num3 && pos > 0 )
+ {
+ if ( outnum < 5 && out == 0 )
+ {
+ if ( innum > 0 )
+ {
+ return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
+ }
+ }
+
+ if ( innum < 5 && in == 0 )
+ {
+ if ( outnum > 0 )
+ {
+ return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
+ }
+ }
+
+ if ( in == 0 || out == 0 )
+ {
+ return 0;
+ }
+
+ return ( int ) ( ( ( double ) ( in * out ) / ( double ) ( innum * outnum ) ) * 100 );
+ }
+
+ if ( pos == num3 )
+ {
+ return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
+ }
+
+ if ( pos == 0 )
+ {
+ return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
+ }
+ }
+ else if ( innum > 0 )
+ {
+ return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
+ }
+ else if ( outnum > 0 )
+ {
+ if ( pos == num3 && out == 1 && outnum > 5 )
+ {
+ return 0;
+ }
+
+ return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
+ }
+
+ return 0;
+ }
+ else
+ {
+ id = * ( unsigned int * ) darrayGet ( scaf5, pos );
+ int start = pos > 100 ? pos - 100 : 0;
+ int end = pos + 100 < num5 ? pos + 100 : num5;
+ in = 0, out = 0;
+
+ for ( i = start; i < end; i++ )
+ {
+ threeid = * ( unsigned int * ) darrayGet ( scaf5, i );
+
+ if ( threeid == id )
+ {
+ pos = i;
+ continue;
+ }
+
+ dh_cnt = getCnt ( id, threeid );
+
+ if ( dh_cnt && dh_cnt->weight > 0 )
+ {
+ out++;
+ }
+
+ dh_cnt = getCnt ( threeid, id );
+
+ if ( dh_cnt && dh_cnt->weight > 0 )
+ {
+ in++;
+ }
+ }
+
+ outnum = allConnect ( id, NULL );
+ innum = allConnect ( getTwinCtg ( id ), NULL );
+ int num3_check = 0;
+
+ if ( pos - start < innum )
+ {
+ for ( i = 0; i < num3; i++ )
+ {
+ num3_check++;
+ threeid = * ( unsigned int * ) darrayGet ( scaf3, i );
+ dh_cnt = getCnt ( threeid, id );
+
+ if ( dh_cnt && dh_cnt->weight > 0 )
+ {
+ in++;
+ }
+
+ if ( num3_check == innum )
+ {
+ break;
+ }
+ }
+ }
+
+ if ( pos - start + num3_check < innum )
+ {
+ innum = pos - start + num3_check;
+ }
+
+ if ( end - pos - 1 < outnum )
+ {
+ outnum = end - pos - 1;
+ }
+
+ if ( innum > 0 && outnum > 0 )
+ {
+ if ( pos != num5 - 1 && pos > 0 )
+ {
+ if ( outnum < 5 && out == 0 && innum > 0 )
+ {
+ return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
+ }
+
+ if ( innum < 5 && in == 0 && outnum > 0 )
+ {
+ return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
+ }
+
+ if ( in == 0 || out == 0 )
+ {
+ return 0;
+ }
+
+ return ( int ) ( ( ( double ) ( in * out ) / ( double ) ( innum * outnum ) ) * 100 );
+ }
+
+ if ( pos == 0 )
+ {
+ return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
+ }
+
+ if ( pos == num5 - 1 )
+ {
+ return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
+ }
+ }
+ else if ( innum > 0 )
+ {
+ if ( pos == num5 - 1 && in == 1 && innum > 5 )
+ {
+ return 0;
+ }
+
+ return ( int ) ( ( ( double ) ( in ) / ( double ) ( innum ) ) * 100 );
+ }
+ else if ( outnum > 0 )
+ {
+ return ( int ) ( ( ( double ) ( out ) / ( double ) ( outnum ) ) * 100 );
+ }
+
+ return 0;
+ }
+
+ return 0;
}
/*************************************************
@@ -4216,535 +4588,597 @@ Output:
Return:
None.
*************************************************/
-void scaffolding ( unsigned int len_cut, char * outfile )
+void scaffolding ( unsigned int len_cut, char *outfile )
{
- unsigned int prev_ctg, ctg, bal_ctg, *length_array, count = 0, num_lctg = 0, *score_array;
- unsigned int i, max_steps = 5;
- int num5, num3, j, len, flag, num_route, gap_c = 0;
- int tempCounter;
- short gap = 0;
- long long sum = 0, N50, N90;
- FILE * fp, *fo = NULL;
- char name[256];
- CONNECT * cnt, *prevCNT, *nextCnt, *dh_cnt;
- boolean excep, weak;
- weakCounter = 0;
- so_far = ( unsigned int * ) ckalloc ( max_n_routes * sizeof ( unsigned int ) );
- found_routes = ( unsigned int ** ) ckalloc ( max_n_routes * sizeof ( unsigned int * ) );
-
- for ( j = 0; j < max_n_routes; j++ )
- { found_routes[j] = ( unsigned int * ) ckalloc ( max_steps * sizeof ( unsigned int ) ); }
-
- length_array = ( unsigned int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( unsigned int ) );
-
- //use length_array to change info in index_array
- for ( i = 1; i <= num_ctg; i++ )
- { length_array[i] = 0; }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( index_array[i] > 0 )
- { length_array[index_array[i]] = i; }
- }
-
- for ( i = 1; i <= num_ctg; i++ )
- { index_array[i] = length_array[i]; }
-
- orig2new = 0;
- sprintf ( name, "%s.scaf", outfile );
- fp = ckopen ( name, "w" );
- sprintf ( name, "%s.scaf_gap", outfile );
- fo = ckopen ( name, "w" );
- scaf3 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- scaf5 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- gap3 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
- gap5 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
- tempArray = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
-
- for ( i = 1; i <= num_ctg; i++ )
- { contig_array[i].flag = 0; }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].length + ( unsigned int ) overlaplen >= len_cut )
- { num_lctg++; }
- else
- { continue; }
-
- if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect || !validConnect ( i, NULL ) )
- { continue; }
-
- num5 = num3 = 0;
- ctg = i;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
- contig_array[i].flag = 1;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[bal_ctg].flag = 1;
- len = contig_array[i].length;
- prevCNT = NULL;
- cnt = getNextContig ( ctg, prevCNT, &excep );
-
- while ( cnt )
- {
- nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
-
- if ( excep && prevCNT )
- { fprintf ( stderr, "scaffolding: exception --- prev cnt from %u\n", prevCNT->contigID ); }
-
- if ( nextCnt && nextCnt->used )
- { break; }
-
- setConnectUsed ( ctg, cnt->contigID, 1 );
- * ( int * ) darrayPut ( gap5, num5 - 1 ) = cnt->gapLen;
- ctg = cnt->contigID;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
- len += cnt->gapLen + contig_array[ctg].length;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- prevCNT = cnt;
- cnt = nextCnt;
- }
-
- ctg = getTwinCtg ( i );
-
- if ( num5 >= 2 )
- { prevCNT = checkConnect ( getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf5, 1 ) ), ctg ); }
- else
- { prevCNT = NULL; }
-
- cnt = getNextContig ( ctg, prevCNT, &excep );
-
- while ( cnt )
- {
- nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
-
- if ( excep && prevCNT )
- { fprintf ( stderr, "scaffolding: exception -- prev cnt from %u\n", prevCNT->contigID ); }
-
- if ( nextCnt && nextCnt->used )
- { break; }
-
- setConnectUsed ( ctg, cnt->contigID, 1 );
- ctg = cnt->contigID;
- len += cnt->gapLen + contig_array[ctg].length;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- * ( int * ) darrayPut ( gap3, num3 ) = cnt->gapLen;
- * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
- prevCNT = cnt;
- cnt = nextCnt;
- }
-
- if ( num5 + num3 == 1 )
- {
- contig_array[i].flag = 0;
- continue;
- }
-
- len += overlaplen;
- sum += len;
- length_array[count++] = len;
-
- if ( num5 + num3 < 1 )
- {
- fprintf ( stderr, "no scaffold created for contig %d\n", i );
- continue;
- }
-
- tempCounter = 0;
-
- for ( j = num3 - 1; j >= 0; j-- )
- { * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) = * ( unsigned int * ) darrayGet ( scaf3, j ); }
-
- for ( j = 0; j < num5; j++ )
- { * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) = * ( unsigned int * ) darrayGet ( scaf5, j ); }
-
- score_array = ( unsigned int * ) ckalloc ( tempCounter * sizeof ( unsigned int ) );
- int now_cnt_weight, curr_ctg_score, pre_score = -1, prev_id = 0, mask_num = 0, score_count = 0, prev_p = 0;
-
- for ( j = 0; j < tempCounter; j++ )
- {
- int currId = * ( unsigned int * ) darrayGet ( tempArray, j );
- dh_cnt = getCntBetween ( prev_id, currId );
-
- if ( dh_cnt )
- { now_cnt_weight = dh_cnt ->weight; }
- else
- { now_cnt_weight = 0; }
-
- curr_ctg_score = get_ctg_score2 ( j, tempCounter );
-
- if ( prev_id == 0 )
- {
- pre_score = curr_ctg_score;
- prev_id = currId;
- prev_p = j;
- continue;
- }
-
- if ( score_mask )
- {
- if ( now_cnt_weight == 0 && j > 0 && j < tempCounter - 1 )
- {
- if ( pre_score == 0 )
- {
- * ( unsigned int * ) darrayPut ( tempArray, prev_p ) = 0;
- contig_array[prev_id].flag = 0;
- contig_array[getTwinCtg ( prev_id )].flag = 0;
- mask_num++;
- }
- else if ( curr_ctg_score == 0 )
- {
- * ( unsigned int * ) darrayPut ( tempArray, j ) = 0;
- contig_array[currId].flag = 0;
- contig_array[getTwinCtg ( currId )].flag = 0;
- mask_num++;
-
- if ( j < tempCounter - 1 )
- { continue; }
- }
- }
-
- if ( abs ( prev_id - currId ) <= 2 && now_cnt_weight == 0
- && * ( unsigned int * ) darrayGet ( tempArray, prev_p ) != 0 && * ( unsigned int * ) darrayGet ( tempArray, j ) != 0 )
- {
- mask_num++;
-
- if ( contig_array[prev_id].cvg < contig_array[currId].cvg )
- {
- * ( unsigned int * ) darrayPut ( tempArray, prev_p ) = 0;
- contig_array[prev_id].flag = 0;
- contig_array[getTwinCtg ( prev_id )].flag = 0;
- }
- else
- {
- * ( unsigned int * ) darrayPut ( tempArray, j ) = 0;
- contig_array[currId].flag = 0;
- contig_array[getTwinCtg ( currId )].flag = 0;
-
- if ( j < tempCounter - 1 )
- { continue; }
- }
- }
- }
-
- if ( * ( unsigned int * ) darrayGet ( tempArray, prev_p ) != 0 )
- { score_array[score_count++] = pre_score; }
-
- pre_score = curr_ctg_score;
- prev_id = currId;
- prev_p = j;
- }
-
- if ( * ( unsigned int * ) darrayGet ( tempArray, prev_p ) != 0 )
- { score_array[score_count++] = pre_score; }
-
- if ( score_mask == 1 && ( ( num3 + num5 > 5 && score_count < 2 ) || score_count == 1 ) )
- {
- free ( ( void * ) score_array );
- --count;
- sum -= len;
-
- for ( j = 0; j < num3; j++ )
- {
- ctg = * ( unsigned int * ) darrayGet ( scaf3, j );
- contig_array[ctg].flag = 0;
- contig_array[getTwinCtg ( ctg )].flag = 0;
- }
-
- for ( j = 0; j < num5; j++ )
- {
- ctg = * ( unsigned int * ) darrayGet ( scaf5, j );
- contig_array[ctg].flag = 0;
- contig_array[getTwinCtg ( ctg )].flag = 0;
- }
-
- continue;
- }
-
- fprintf ( fp, ">scaffold%d %d %d %d\n", count, score_count, len, num3 + num5, mask_num );
- fprintf ( fo, ">scaffold%d %d %d %d\n", count, score_count, len, num3 + num5, mask_num );
- len = prev_ctg = 0;
- tempCounter = 0, score_count = 0;
-
- for ( j = num3 - 1; j >= 0; j-- )
- {
- int now_cnt_weigth = 0;
- int nextid, start = 0;
- int currId = * ( unsigned int * ) darrayGet ( scaf3, j );
- int tmpid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter++ );
-
- if ( tmpid == 0 )
- {
- if ( j == num3 - 1 )
- {
- len = 0;
- continue;
- }
-
- len += contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
- int tmpgap = contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
- gap += tmpgap > 0 ? tmpgap : 0;
- continue;
- }
-
- if ( j > 0 )
- {
- nextid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter + start );
-
- while ( nextid == 0 && tempCounter + start + 1 < num3 + num5 )
- {
- start++;
- nextid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter + start );
- }
- }
- else
- {
- nextid = i;
- }
-
- CONNECT * dh_cnt = getCntBetween ( currId, nextid );
-
- if ( dh_cnt )
- { now_cnt_weigth = dh_cnt->weight; }
- else
- { now_cnt_weigth = 0; }
-
- curr_ctg_score = score_array[score_count++];
-
- if ( score_mask == 1 && curr_ctg_score == 0 && ( num3 + num5 > 2 )
- && ( ( j == num3 - 1 && contig_array[nextid].length < 200 && num3 + num5 > 5 ) || ( now_cnt_weigth == 0 && j > 0 ) ) )
- {
- if ( j == num3 - 1 )
- {
- len = 0;
- continue;
- }
-
- len += contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
- int tmpgap = contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
- gap += tmpgap > 0 ? tmpgap : 0;
- continue;
- }
-
- if ( !isLargerThanTwin ( * ( unsigned int * ) darrayGet ( scaf3, j ) ) )
- {
- fprintf ( fp, "%-10d %-10d + %d %d %d"
- , index_array[* ( unsigned int * ) darrayGet ( scaf3, j )], len,
- contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + overlaplen,
- now_cnt_weigth, curr_ctg_score );
- weak = printCnts ( fp, * ( unsigned int * ) darrayGet ( scaf3, j ) );
- }
- else
- {
- fprintf ( fp, "%-10d %-10d - %d %d %d"
- , index_array[getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf3, j ) )], len
- , contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + overlaplen,
- now_cnt_weigth, curr_ctg_score );
- weak = printCnts ( fp, * ( unsigned int * ) darrayGet ( scaf3, j ) );
- }
-
- if ( prev_ctg )
- {
- num_route = num_trace = 0;
- traceAlongArc ( * ( unsigned int * ) darrayGet ( scaf3, j ), prev_ctg, max_steps
- , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route );
-
- if ( num_route == 1 )
- {
- output1gap ( fo, max_steps );
- gap_c++;
- }
- }
-
- fprintf ( fo, "%-10d %-10d\n", * ( unsigned int * ) darrayGet ( scaf3, j ), len );
- len += contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
- prev_ctg = * ( unsigned int * ) darrayGet ( scaf3, j );
- gap = * ( int * ) darrayGet ( gap3, j ) > 0 ? * ( int * ) darrayGet ( gap3, j ) : 0;
- }
-
- for ( j = 0; j < num5; j++ )
- {
- int now_cnt_weigth = 0;
- int currId, nextid, start = 0;
- currId = * ( unsigned int * ) darrayGet ( scaf5, j );
- int tmpid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter++ );
-
- if ( tmpid == 0 )
- {
- if ( j == num5 - 1 )
- { continue; }
-
- len += contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + * ( int * ) darrayGet ( gap5, j );
- int tmpgap = contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + * ( int * ) darrayGet ( gap5, j );
- gap += tmpgap > 0 ? tmpgap : 0;
- continue;
- }
-
- if ( j < num5 - 1 )
- {
- nextid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter + start );
-
- while ( nextid == 0 && tempCounter + start + 1 < num3 + num5 )
- {
- start ++;
- nextid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter + start );
- }
-
- CONNECT * dh_cnt = getCntBetween ( currId, nextid );
-
- if ( dh_cnt )
- { now_cnt_weigth = dh_cnt->weight; }
- else
- { now_cnt_weigth = 0; }
- }
-
- curr_ctg_score = score_array [score_count++];
-
- if ( score_mask == 1 && curr_ctg_score == 0 && ( num3 + num5 > 2 )
- && ( ( j == num5 - 1 && contig_array[* ( unsigned int * ) darrayGet ( scaf5, j - 1 )].length < 200 && num3 + num5 > 5 )
- || ( j != num5 - 1 && now_cnt_weigth == 0 ) ) )
- {
- if ( j == num5 - 1 )
- {
- continue;
- }
-
- len += contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + * ( int * ) darrayGet ( gap5, j );
- int tmpgap = contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + * ( int * ) darrayGet ( gap5, j );
- gap += tmpgap > 0 ? tmpgap : 0;
- continue;
- }
-
- if ( !isLargerThanTwin ( * ( unsigned int * ) darrayGet ( scaf5, j ) ) )
- {
- fprintf ( fp, "%-10d %-10d + %d %d %d"
- , index_array[* ( unsigned int * ) darrayGet ( scaf5, j )], len
- , contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + overlaplen,
- now_cnt_weigth, curr_ctg_score );
- weak = printCnts ( fp, * ( unsigned int * ) darrayGet ( scaf5, j ) );
- }
- else
- {
- fprintf ( fp, "%-10d %-10d - %d %d %d"
- , index_array[getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf5, j ) )], len
- , contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + overlaplen,
- now_cnt_weigth, curr_ctg_score );
- weak = printCnts ( fp, * ( unsigned int * ) darrayGet ( scaf5, j ) );
- }
-
- if ( prev_ctg )
- {
- num_route = num_trace = 0;
- traceAlongArc ( * ( unsigned int * ) darrayGet ( scaf5, j ), prev_ctg, max_steps
- , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route );
-
- if ( num_route == 1 )
- {
- output1gap ( fo, max_steps );
- gap_c++;
- }
- }
-
- fprintf ( fo, "%-10d %-10d\n", * ( unsigned int * ) darrayGet ( scaf5, j ), len );
-
- if ( j < num5 - 1 )
- {
- len += contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length +
- * ( int * ) darrayGet ( gap5, j );
- prev_ctg = * ( unsigned int * ) darrayGet ( scaf5, j );
- gap = * ( int * ) darrayGet ( gap5, j ) > 0 ? * ( int * ) darrayGet ( gap5, j ) : 0;
- }
- }
-
- free ( ( void * ) score_array );
- }
-
- freeDarray ( scaf3 );
- freeDarray ( scaf5 );
- freeDarray ( gap3 );
- freeDarray ( gap5 );
- freeDarray ( tempArray );
- fclose ( fp );
- fclose ( fo );
- fprintf ( stderr, "\nThe final rank\n" );
-
- if ( count == 0 )
- {
- fprintf ( stderr, "\n\nNo scaffold was constructed.\n\n" );
- free ( ( void * ) length_array );
-
- for ( j = 0; j < max_n_routes; j++ )
- { free ( ( void * ) found_routes[j] ); }
-
- free ( ( void * ) found_routes );
- free ( ( void * ) so_far );
- return;
- }
-
- fprintf ( stderr, "\n*******************************\n" );
- fprintf ( stderr, " Scaffold number %d\n", count );
- fprintf ( stderr, " In-scaffold contig number %u\n", num_lctg / 2 );
- fprintf ( stderr, " Total scaffold length %lld\n", sum );
- fprintf ( stderr, " Average scaffold length %lld\n", sum / count );
- fprintf ( stderr, " Filled gap number %d\n", gap_c );
-
- //output singleton
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].length + ( unsigned int ) overlaplen < len_cut || contig_array[i].flag )
- { continue; }
-
- length_array[count++] = contig_array[i].length;
- sum += contig_array[i].length;
-
- if ( isSmallerThanTwin ( i ) )
- { i++; }
- }
-
- long long total_len = sum;
- qsort ( length_array, count, sizeof ( length_array[0] ), cmp_int );
- N50 = sum * 0.5;
- N90 = sum * 0.9;
- int N50length = 0;
- int N90length = 0;
- sum = flag = 0;
-
- for ( j = count - 1; j >= 0; j-- )
- {
- sum += length_array[j];
-
- if ( !flag && sum >= N50 && N50length == 0 )
- {
- N50length = length_array[j];
- flag++;
- }
-
- if ( sum >= N90 && N90length == 0 )
- {
- N90length = length_array[j];
- break;
- }
- }
-
- fprintf ( stderr, " Longest scaffold %lld\n", length_array[count - 1] );
- fprintf ( stderr, " Scaffold and singleton number %d\n", count );
- fprintf ( stderr, " Scaffold and singleton length %lld\n", total_len );
- fprintf ( stderr, " Average length %d\n", total_len / count );
- fprintf ( stderr, " N50 %d\n", N50length );
- fprintf ( stderr, " N90 %d\n", N90length );
- fprintf ( stderr, " Weak points %d\n", weakCounter );
- fprintf ( stderr, "\n*******************************\n" );
- fflush ( stdout );
- free ( ( void * ) length_array );
-
- for ( j = 0; j < max_n_routes; j++ )
- { free ( ( void * ) found_routes[j] ); }
-
- free ( ( void * ) found_routes );
- free ( ( void * ) so_far );
+ unsigned int prev_ctg, ctg, bal_ctg, *length_array, count = 0, num_lctg = 0, *score_array;
+ unsigned int i, max_steps = 5;
+ int num5, num3, j, len, flag, num_route, gap_c = 0;
+ int tempCounter;
+ short gap = 0;
+ long long sum = 0, N50, N90;
+ FILE *fp, *fo = NULL;
+ char name[256];
+ CONNECT *cnt, *prevCNT, *nextCnt, *dh_cnt;
+ boolean excep, weak;
+ weakCounter = 0;
+ so_far = ( unsigned int * ) ckalloc ( max_n_routes * sizeof ( unsigned int ) );
+ found_routes = ( unsigned int ** ) ckalloc ( max_n_routes * sizeof ( unsigned int * ) );
+
+ for ( j = 0; j < max_n_routes; j++ )
+ {
+ found_routes[j] = ( unsigned int * ) ckalloc ( max_steps * sizeof ( unsigned int ) );
+ }
+
+ length_array = ( unsigned int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( unsigned int ) );
+
+ //use length_array to change info in index_array
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ length_array[i] = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( index_array[i] > 0 )
+ {
+ length_array[index_array[i]] = i;
+ }
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ index_array[i] = length_array[i];
+ }
+
+ orig2new = 0;
+ sprintf ( name, "%s.scaf", outfile );
+ fp = ckopen ( name, "w" );
+ sprintf ( name, "%s.scaf_gap", outfile );
+ fo = ckopen ( name, "w" );
+ scaf3 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ scaf5 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ gap3 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
+ gap5 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
+ tempArray = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ contig_array[i].flag = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].length + ( unsigned int ) overlaplen >= len_cut )
+ {
+ num_lctg++;
+ }
+ else
+ {
+ continue;
+ }
+
+ if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect || !validConnect ( i, NULL ) )
+ {
+ continue;
+ }
+
+ num5 = num3 = 0;
+ ctg = i;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
+ contig_array[i].flag = 1;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[bal_ctg].flag = 1;
+ len = contig_array[i].length;
+ prevCNT = NULL;
+ cnt = getNextContig ( ctg, prevCNT, &excep );
+
+ while ( cnt )
+ {
+ nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
+
+ if ( excep && prevCNT )
+ {
+ fprintf ( stderr, "scaffolding: exception --- prev cnt from %u\n", prevCNT->contigID );
+ }
+
+ if ( nextCnt && nextCnt->used )
+ {
+ break;
+ }
+
+ setConnectUsed ( ctg, cnt->contigID, 1 );
+ * ( int * ) darrayPut ( gap5, num5 - 1 ) = cnt->gapLen;
+ ctg = cnt->contigID;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
+ len += cnt->gapLen + contig_array[ctg].length;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ }
+
+ ctg = getTwinCtg ( i );
+
+ if ( num5 >= 2 )
+ {
+ prevCNT = checkConnect ( getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf5, 1 ) ), ctg );
+ }
+ else
+ {
+ prevCNT = NULL;
+ }
+
+ cnt = getNextContig ( ctg, prevCNT, &excep );
+
+ while ( cnt )
+ {
+ nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
+
+ if ( excep && prevCNT )
+ {
+ fprintf ( stderr, "scaffolding: exception -- prev cnt from %u\n", prevCNT->contigID );
+ }
+
+ if ( nextCnt && nextCnt->used )
+ {
+ break;
+ }
+
+ setConnectUsed ( ctg, cnt->contigID, 1 );
+ ctg = cnt->contigID;
+ len += cnt->gapLen + contig_array[ctg].length;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ * ( int * ) darrayPut ( gap3, num3 ) = cnt->gapLen;
+ * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ }
+
+ if ( num5 + num3 == 1 )
+ {
+ contig_array[i].flag = 0;
+ continue;
+ }
+
+ len += overlaplen;
+ sum += len;
+ length_array[count++] = len;
+
+ if ( num5 + num3 < 1 )
+ {
+ fprintf ( stderr, "no scaffold created for contig %d\n", i );
+ continue;
+ }
+
+ tempCounter = 0;
+
+ for ( j = num3 - 1; j >= 0; j-- )
+ {
+ * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) = * ( unsigned int * ) darrayGet ( scaf3, j );
+ }
+
+ for ( j = 0; j < num5; j++ )
+ {
+ * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) = * ( unsigned int * ) darrayGet ( scaf5, j );
+ }
+
+ score_array = ( unsigned int * ) ckalloc ( tempCounter * sizeof ( unsigned int ) );
+ int now_cnt_weight, curr_ctg_score, pre_score = -1, prev_id = 0, mask_num = 0, score_count = 0, prev_p = 0;
+
+ for ( j = 0; j < tempCounter; j++ )
+ {
+ int currId = * ( unsigned int * ) darrayGet ( tempArray, j );
+ dh_cnt = getCntBetween ( prev_id, currId );
+
+ if ( dh_cnt )
+ {
+ now_cnt_weight = dh_cnt ->weight;
+ }
+ else
+ {
+ now_cnt_weight = 0;
+ }
+
+ curr_ctg_score = get_ctg_score2 ( j, tempCounter );
+
+ if ( prev_id == 0 )
+ {
+ pre_score = curr_ctg_score;
+ prev_id = currId;
+ prev_p = j;
+ continue;
+ }
+
+ if ( score_mask )
+ {
+ if ( now_cnt_weight == 0 && j > 0 && j < tempCounter - 1 )
+ {
+ if ( pre_score == 0 )
+ {
+ * ( unsigned int * ) darrayPut ( tempArray, prev_p ) = 0;
+ contig_array[prev_id].flag = 0;
+ contig_array[getTwinCtg ( prev_id )].flag = 0;
+ mask_num++;
+ }
+ else if ( curr_ctg_score == 0 )
+ {
+ * ( unsigned int * ) darrayPut ( tempArray, j ) = 0;
+ contig_array[currId].flag = 0;
+ contig_array[getTwinCtg ( currId )].flag = 0;
+ mask_num++;
+
+ if ( j < tempCounter - 1 )
+ {
+ continue;
+ }
+ }
+ }
+
+ if ( abs ( prev_id - currId ) <= 2 && now_cnt_weight == 0
+ && * ( unsigned int * ) darrayGet ( tempArray, prev_p ) != 0 && * ( unsigned int * ) darrayGet ( tempArray, j ) != 0 )
+ {
+ mask_num++;
+
+ if ( contig_array[prev_id].cvg < contig_array[currId].cvg )
+ {
+ * ( unsigned int * ) darrayPut ( tempArray, prev_p ) = 0;
+ contig_array[prev_id].flag = 0;
+ contig_array[getTwinCtg ( prev_id )].flag = 0;
+ }
+ else
+ {
+ * ( unsigned int * ) darrayPut ( tempArray, j ) = 0;
+ contig_array[currId].flag = 0;
+ contig_array[getTwinCtg ( currId )].flag = 0;
+
+ if ( j < tempCounter - 1 )
+ {
+ continue;
+ }
+ }
+ }
+ }
+
+ if ( * ( unsigned int * ) darrayGet ( tempArray, prev_p ) != 0 )
+ {
+ score_array[score_count++] = pre_score;
+ }
+
+ pre_score = curr_ctg_score;
+ prev_id = currId;
+ prev_p = j;
+ }
+
+ if ( * ( unsigned int * ) darrayGet ( tempArray, prev_p ) != 0 )
+ {
+ score_array[score_count++] = pre_score;
+ }
+
+ if ( score_mask == 1 && ( ( num3 + num5 > 5 && score_count < 2 ) || score_count == 1 ) )
+ {
+ free ( ( void * ) score_array );
+ --count;
+ sum -= len;
+
+ for ( j = 0; j < num3; j++ )
+ {
+ ctg = * ( unsigned int * ) darrayGet ( scaf3, j );
+ contig_array[ctg].flag = 0;
+ contig_array[getTwinCtg ( ctg )].flag = 0;
+ }
+
+ for ( j = 0; j < num5; j++ )
+ {
+ ctg = * ( unsigned int * ) darrayGet ( scaf5, j );
+ contig_array[ctg].flag = 0;
+ contig_array[getTwinCtg ( ctg )].flag = 0;
+ }
+
+ continue;
+ }
+
+ fprintf ( fp, ">scaffold%d %d %d %d\n", count, score_count, len, num3 + num5, mask_num );
+ fprintf ( fo, ">scaffold%d %d %d %d\n", count, score_count, len, num3 + num5, mask_num );
+ len = prev_ctg = 0;
+ tempCounter = 0, score_count = 0;
+
+ for ( j = num3 - 1; j >= 0; j-- )
+ {
+ int now_cnt_weigth = 0;
+ int nextid, start = 0;
+ int currId = * ( unsigned int * ) darrayGet ( scaf3, j );
+ int tmpid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter++ );
+
+ if ( tmpid == 0 )
+ {
+ if ( j == num3 - 1 )
+ {
+ len = 0;
+ continue;
+ }
+
+ len += contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
+ int tmpgap = contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
+ gap += tmpgap > 0 ? tmpgap : 0;
+ continue;
+ }
+
+ if ( j > 0 )
+ {
+ nextid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter + start );
+
+ while ( nextid == 0 && tempCounter + start + 1 < num3 + num5 )
+ {
+ start++;
+ nextid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter + start );
+ }
+ }
+ else
+ {
+ nextid = i;
+ }
+
+ CONNECT *dh_cnt = getCntBetween ( currId, nextid );
+
+ if ( dh_cnt )
+ {
+ now_cnt_weigth = dh_cnt->weight;
+ }
+ else
+ {
+ now_cnt_weigth = 0;
+ }
+
+ curr_ctg_score = score_array[score_count++];
+
+ if ( score_mask == 1 && curr_ctg_score == 0 && ( num3 + num5 > 2 )
+ && ( ( j == num3 - 1 && contig_array[nextid].length < 200 && num3 + num5 > 5 ) || ( now_cnt_weigth == 0 && j > 0 ) ) )
+ {
+ if ( j == num3 - 1 )
+ {
+ len = 0;
+ continue;
+ }
+
+ len += contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
+ int tmpgap = contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
+ gap += tmpgap > 0 ? tmpgap : 0;
+ continue;
+ }
+
+ if ( !isLargerThanTwin ( * ( unsigned int * ) darrayGet ( scaf3, j ) ) )
+ {
+ fprintf ( fp, "%-10d %-10d + %d %d %d"
+ , index_array[* ( unsigned int * ) darrayGet ( scaf3, j )], len,
+ contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + overlaplen,
+ now_cnt_weigth, curr_ctg_score );
+ weak = printCnts ( fp, * ( unsigned int * ) darrayGet ( scaf3, j ) );
+ }
+ else
+ {
+ fprintf ( fp, "%-10d %-10d - %d %d %d"
+ , index_array[getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf3, j ) )], len
+ , contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + overlaplen,
+ now_cnt_weigth, curr_ctg_score );
+ weak = printCnts ( fp, * ( unsigned int * ) darrayGet ( scaf3, j ) );
+ }
+
+ if ( prev_ctg )
+ {
+ num_route = num_trace = 0;
+ traceAlongArc ( * ( unsigned int * ) darrayGet ( scaf3, j ), prev_ctg, max_steps
+ , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route );
+
+ if ( num_route == 1 )
+ {
+ output1gap ( fo, max_steps );
+ gap_c++;
+ }
+ }
+
+ fprintf ( fo, "%-10d %-10d\n", * ( unsigned int * ) darrayGet ( scaf3, j ), len );
+ len += contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
+ prev_ctg = * ( unsigned int * ) darrayGet ( scaf3, j );
+ gap = * ( int * ) darrayGet ( gap3, j ) > 0 ? * ( int * ) darrayGet ( gap3, j ) : 0;
+ }
+
+ for ( j = 0; j < num5; j++ )
+ {
+ int now_cnt_weigth = 0;
+ int currId, nextid, start = 0;
+ currId = * ( unsigned int * ) darrayGet ( scaf5, j );
+ int tmpid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter++ );
+
+ if ( tmpid == 0 )
+ {
+ if ( j == num5 - 1 )
+ {
+ continue;
+ }
+
+ len += contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + * ( int * ) darrayGet ( gap5, j );
+ int tmpgap = contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + * ( int * ) darrayGet ( gap5, j );
+ gap += tmpgap > 0 ? tmpgap : 0;
+ continue;
+ }
+
+ if ( j < num5 - 1 )
+ {
+ nextid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter + start );
+
+ while ( nextid == 0 && tempCounter + start + 1 < num3 + num5 )
+ {
+ start ++;
+ nextid = * ( unsigned int * ) darrayGet ( tempArray, tempCounter + start );
+ }
+
+ CONNECT *dh_cnt = getCntBetween ( currId, nextid );
+
+ if ( dh_cnt )
+ {
+ now_cnt_weigth = dh_cnt->weight;
+ }
+ else
+ {
+ now_cnt_weigth = 0;
+ }
+ }
+
+ curr_ctg_score = score_array [score_count++];
+
+ if ( score_mask == 1 && curr_ctg_score == 0 && ( num3 + num5 > 2 )
+ && ( ( j == num5 - 1 && contig_array[* ( unsigned int * ) darrayGet ( scaf5, j - 1 )].length < 200 && num3 + num5 > 5 )
+ || ( j != num5 - 1 && now_cnt_weigth == 0 ) ) )
+ {
+ if ( j == num5 - 1 )
+ {
+ continue;
+ }
+
+ len += contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + * ( int * ) darrayGet ( gap5, j );
+ int tmpgap = contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + * ( int * ) darrayGet ( gap5, j );
+ gap += tmpgap > 0 ? tmpgap : 0;
+ continue;
+ }
+
+ if ( !isLargerThanTwin ( * ( unsigned int * ) darrayGet ( scaf5, j ) ) )
+ {
+ fprintf ( fp, "%-10d %-10d + %d %d %d"
+ , index_array[* ( unsigned int * ) darrayGet ( scaf5, j )], len
+ , contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + overlaplen,
+ now_cnt_weigth, curr_ctg_score );
+ weak = printCnts ( fp, * ( unsigned int * ) darrayGet ( scaf5, j ) );
+ }
+ else
+ {
+ fprintf ( fp, "%-10d %-10d - %d %d %d"
+ , index_array[getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf5, j ) )], len
+ , contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length + overlaplen,
+ now_cnt_weigth, curr_ctg_score );
+ weak = printCnts ( fp, * ( unsigned int * ) darrayGet ( scaf5, j ) );
+ }
+
+ if ( prev_ctg )
+ {
+ num_route = num_trace = 0;
+ traceAlongArc ( * ( unsigned int * ) darrayGet ( scaf5, j ), prev_ctg, max_steps
+ , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route );
+
+ if ( num_route == 1 )
+ {
+ output1gap ( fo, max_steps );
+ gap_c++;
+ }
+ }
+
+ fprintf ( fo, "%-10d %-10d\n", * ( unsigned int * ) darrayGet ( scaf5, j ), len );
+
+ if ( j < num5 - 1 )
+ {
+ len += contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length +
+ * ( int * ) darrayGet ( gap5, j );
+ prev_ctg = * ( unsigned int * ) darrayGet ( scaf5, j );
+ gap = * ( int * ) darrayGet ( gap5, j ) > 0 ? * ( int * ) darrayGet ( gap5, j ) : 0;
+ }
+ }
+
+ free ( ( void * ) score_array );
+ }
+
+ freeDarray ( scaf3 );
+ freeDarray ( scaf5 );
+ freeDarray ( gap3 );
+ freeDarray ( gap5 );
+ freeDarray ( tempArray );
+ fclose ( fp );
+ fclose ( fo );
+ fprintf ( stderr, "\nThe final rank\n" );
+
+ if ( count == 0 )
+ {
+ fprintf ( stderr, "\n\nNo scaffold was constructed.\n\n" );
+ free ( ( void * ) length_array );
+
+ for ( j = 0; j < max_n_routes; j++ )
+ {
+ free ( ( void * ) found_routes[j] );
+ }
+
+ free ( ( void * ) found_routes );
+ free ( ( void * ) so_far );
+ return;
+ }
+
+ fprintf ( stderr, "\n*******************************\n" );
+ fprintf ( stderr, " Scaffold number %d\n", count );
+ fprintf ( stderr, " In-scaffold contig number %u\n", num_lctg / 2 );
+ fprintf ( stderr, " Total scaffold length %lld\n", sum );
+ fprintf ( stderr, " Average scaffold length %lld\n", sum / count );
+ fprintf ( stderr, " Filled gap number %d\n", gap_c );
+
+ //output singleton
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].length + ( unsigned int ) overlaplen < len_cut || contig_array[i].flag )
+ {
+ continue;
+ }
+
+ length_array[count++] = contig_array[i].length;
+ sum += contig_array[i].length;
+
+ if ( isSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+ }
+
+ long long total_len = sum;
+ qsort ( length_array, count, sizeof ( length_array[0] ), cmp_int );
+ N50 = sum * 0.5;
+ N90 = sum * 0.9;
+ int N50length = 0;
+ int N90length = 0;
+ sum = flag = 0;
+
+ for ( j = count - 1; j >= 0; j-- )
+ {
+ sum += length_array[j];
+
+ if ( !flag && sum >= N50 && N50length == 0 )
+ {
+ N50length = length_array[j];
+ flag++;
+ }
+
+ if ( sum >= N90 && N90length == 0 )
+ {
+ N90length = length_array[j];
+ break;
+ }
+ }
+
+ fprintf ( stderr, " Longest scaffold %lld\n", length_array[count - 1] );
+ fprintf ( stderr, " Scaffold and singleton number %d\n", count );
+ fprintf ( stderr, " Scaffold and singleton length %lld\n", total_len );
+ fprintf ( stderr, " Average length %d\n", total_len / count );
+ fprintf ( stderr, " N50 %d\n", N50length );
+ fprintf ( stderr, " N90 %d\n", N90length );
+ fprintf ( stderr, " Weak points %d\n", weakCounter );
+ fprintf ( stderr, "\n*******************************\n" );
+ fflush ( stdout );
+ free ( ( void * ) length_array );
+
+ for ( j = 0; j < max_n_routes; j++ )
+ {
+ free ( ( void * ) found_routes[j] );
+ }
+
+ free ( ( void * ) found_routes );
+ free ( ( void * ) so_far );
}
/*************************************************
@@ -4762,250 +5196,286 @@ Return:
*************************************************/
void scaffold_count ( int rank, unsigned int len_cut )
{
- static DARRAY * scaf3, *scaf5;
- static DARRAY * gap3, *gap5;
- unsigned int prev_ctg, ctg, bal_ctg, *length_array, count = 0, num_lctg = 0;
- unsigned int i, max_steps = 5;
- int num5, num3, j, len, flag, num_route, gap_c = 0;
- short gap = 0;
- long long sum = 0, N50, N90;
- CONNECT * cnt, *prevCNT, *nextCnt;
- boolean excep;
- so_far = ( unsigned int * ) ckalloc ( max_n_routes * sizeof ( unsigned int ) );
- found_routes = ( unsigned int ** ) ckalloc ( max_n_routes * sizeof ( unsigned int * ) );
-
- for ( j = 0; j < max_n_routes; j++ )
- { found_routes[j] = ( unsigned int * ) ckalloc ( max_steps * sizeof ( unsigned int ) ); }
-
- length_array = ( unsigned int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( unsigned int ) );
-
- //use length_array to change info in index_array
- for ( i = 1; i <= num_ctg; i++ )
- { length_array[i] = 0; }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( index_array[i] > 0 )
- { length_array[index_array[i]] = i; }
- }
-
- for ( i = 1; i <= num_ctg; i++ )
- { index_array[i] = length_array[i]; } //contig i with original index: index_array[i]
-
- orig2new = 0;
- scaf3 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- scaf5 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- gap3 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
- gap5 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
-
- for ( i = 1; i <= num_ctg; i++ )
- { contig_array[i].flag = 0; }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].length + ( unsigned int ) overlaplen >= len_cut )
- { num_lctg++; }
- else
- { continue; }
-
- if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect || !validConnect ( i, NULL ) )
- { continue; }
-
- num5 = num3 = 0;
- ctg = i;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
- contig_array[i].flag = 1;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[bal_ctg].flag = 1;
- len = contig_array[i].length;
- prevCNT = NULL;
- cnt = getNextContig ( ctg, prevCNT, &excep );
-
- while ( cnt )
- {
- nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
-
- if ( excep && prevCNT )
- { fprintf ( stderr, "scaffolding: exception --- prev cnt from %u\n", prevCNT->contigID ); }
-
- if ( nextCnt && nextCnt->used )
- { break; }
-
- setConnectUsed ( ctg, cnt->contigID, 1 );
- * ( int * ) darrayPut ( gap5, num5 - 1 ) = cnt->gapLen;
- ctg = cnt->contigID;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
- len += cnt->gapLen + contig_array[ctg].length;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- prevCNT = cnt;
- cnt = nextCnt;
- }
-
- ctg = getTwinCtg ( i );
-
- if ( num5 >= 2 )
- { prevCNT = checkConnect ( getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf5, 1 ) ), ctg ); }
- else
- { prevCNT = NULL; }
-
- cnt = getNextContig ( ctg, prevCNT, &excep );
-
- while ( cnt )
- {
- nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
-
- if ( excep && prevCNT )
- { fprintf ( stderr, "scaffolding: exception -- prev cnt from %u\n", prevCNT->contigID ); }
-
- if ( nextCnt && nextCnt->used )
- { break; }
-
- setConnectUsed ( ctg, cnt->contigID, 1 );
- ctg = cnt->contigID;
- len += cnt->gapLen + contig_array[ctg].length;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- * ( int * ) darrayPut ( gap3, num3 ) = cnt->gapLen;
- * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
- prevCNT = cnt;
- cnt = nextCnt;
- }
-
- len += overlaplen;
- sum += len;
- length_array[count++] = len;
-
- if ( num5 + num3 < 1 )
- {
- fprintf ( stderr, "no scaffold created for contig %d\n", i );
- continue;
- }
-
- len = prev_ctg = 0;
-
- for ( j = num3 - 1; j >= 0; j-- )
- {
- if ( prev_ctg )
- {
- num_route = num_trace = 0;
- traceAlongArc ( * ( unsigned int * ) darrayGet ( scaf3, j ), prev_ctg, max_steps
- , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route );
-
- if ( num_route == 1 )
- {
- gap_c++;
- }
- }
-
- len += contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
- prev_ctg = * ( unsigned int * ) darrayGet ( scaf3, j );
- gap = * ( int * ) darrayGet ( gap3, j ) > 0 ? * ( int * ) darrayGet ( gap3, j ) : 0;
- }
-
- for ( j = 0; j < num5; j++ )
- {
- if ( prev_ctg )
- {
- num_route = num_trace = 0;
- traceAlongArc ( * ( unsigned int * ) darrayGet ( scaf5, j ), prev_ctg, max_steps
- , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route );
-
- if ( num_route == 1 )
- {
- gap_c++;
- }
- }
-
- if ( j < num5 - 1 )
- {
- len += contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length +
- * ( int * ) darrayGet ( gap5, j );
- prev_ctg = * ( unsigned int * ) darrayGet ( scaf5, j );
- gap = * ( int * ) darrayGet ( gap5, j ) > 0 ? * ( int * ) darrayGet ( gap5, j ) : 0;
- }
- }
- }
-
- freeDarray ( scaf3 );
- freeDarray ( scaf5 );
- freeDarray ( gap3 );
- freeDarray ( gap5 );
-
- if ( count == 0 )
- {
- fprintf ( stderr, "\n\nNo scaffold was constructed.\n\n" );
- free ( ( void * ) length_array );
-
- for ( j = 0; j < max_n_routes; j++ )
- { free ( ( void * ) found_routes[j] ); }
-
- free ( ( void * ) found_routes );
- free ( ( void * ) so_far );
- return;
- }
-
- fprintf ( stderr, "\nRank %d\n", rank );
- fprintf ( stderr, " Scaffold number %d\n", count );
- fprintf ( stderr, " In-scaffold contig number %u\n", num_lctg / 2 );
- fprintf ( stderr, " Total scaffold length %lld\n", sum );
- fprintf ( stderr, " Average scaffold length %lld\n", sum / count );
- fprintf ( stderr, " Filled gap number %d\n", gap_c );
-
- //output singleton
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].length + ( unsigned int ) overlaplen < len_cut || contig_array[i].flag )
- { continue; }
-
- length_array[count++] = contig_array[i].length;
- sum += contig_array[i].length;
-
- if ( isSmallerThanTwin ( i ) )
- { i++; }
- }
-
- long int total_len = sum;
- // calculate N50/N90
- qsort ( length_array, count, sizeof ( length_array[0] ), cmp_int );
- N50 = sum * 0.5;
- N90 = sum * 0.9;
- int N50length = 0;
- int N90length = 0;
- sum = flag = 0;
-
- for ( j = count - 1; j >= 0; j-- )
- {
- sum += length_array[j];
-
- if ( !flag && sum >= N50 && N50length == 0 )
- {
- N50length = length_array[j];
- flag++;
- }
-
- if ( sum >= N90 && N90length == 0 )
- {
- N90length = length_array[j];
- break;
- }
- }
-
- fprintf ( stderr, " Longest scaffold %lld\n", length_array[count - 1] );
- fprintf ( stderr, " Scaffold and singleton number %d\n", count );
- fprintf ( stderr, " Scaffold and singleton length %lld\n", total_len );
- fprintf ( stderr, " Average length %d\n", total_len / count );
- fprintf ( stderr, " N50 %d\n", N50length );
- fprintf ( stderr, " N90 %d\n", N90length );
- free ( ( void * ) length_array );
-
- for ( j = 0; j < max_n_routes; j++ )
- { free ( ( void * ) found_routes[j] ); }
-
- free ( ( void * ) found_routes );
- free ( ( void * ) so_far );
+ static DARRAY *scaf3, *scaf5;
+ static DARRAY *gap3, *gap5;
+ unsigned int prev_ctg, ctg, bal_ctg, *length_array, count = 0, num_lctg = 0;
+ unsigned int i, max_steps = 5;
+ int num5, num3, j, len, flag, num_route, gap_c = 0;
+ short gap = 0;
+ long long sum = 0, N50, N90;
+ CONNECT *cnt, *prevCNT, *nextCnt;
+ boolean excep;
+ so_far = ( unsigned int * ) ckalloc ( max_n_routes * sizeof ( unsigned int ) );
+ found_routes = ( unsigned int ** ) ckalloc ( max_n_routes * sizeof ( unsigned int * ) );
+
+ for ( j = 0; j < max_n_routes; j++ )
+ {
+ found_routes[j] = ( unsigned int * ) ckalloc ( max_steps * sizeof ( unsigned int ) );
+ }
+
+ length_array = ( unsigned int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( unsigned int ) );
+
+ //use length_array to change info in index_array
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ length_array[i] = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( index_array[i] > 0 )
+ {
+ length_array[index_array[i]] = i;
+ }
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ index_array[i] = length_array[i]; //contig i with original index: index_array[i]
+ }
+
+ orig2new = 0;
+ scaf3 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ scaf5 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ gap3 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
+ gap5 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ contig_array[i].flag = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].length + ( unsigned int ) overlaplen >= len_cut )
+ {
+ num_lctg++;
+ }
+ else
+ {
+ continue;
+ }
+
+ if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect || !validConnect ( i, NULL ) )
+ {
+ continue;
+ }
+
+ num5 = num3 = 0;
+ ctg = i;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
+ contig_array[i].flag = 1;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[bal_ctg].flag = 1;
+ len = contig_array[i].length;
+ prevCNT = NULL;
+ cnt = getNextContig ( ctg, prevCNT, &excep );
+
+ while ( cnt )
+ {
+ nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
+
+ if ( excep && prevCNT )
+ {
+ fprintf ( stderr, "scaffolding: exception --- prev cnt from %u\n", prevCNT->contigID );
+ }
+
+ if ( nextCnt && nextCnt->used )
+ {
+ break;
+ }
+
+ setConnectUsed ( ctg, cnt->contigID, 1 );
+ * ( int * ) darrayPut ( gap5, num5 - 1 ) = cnt->gapLen;
+ ctg = cnt->contigID;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
+ len += cnt->gapLen + contig_array[ctg].length;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ }
+
+ ctg = getTwinCtg ( i );
+
+ if ( num5 >= 2 )
+ {
+ prevCNT = checkConnect ( getTwinCtg ( * ( unsigned int * ) darrayGet ( scaf5, 1 ) ), ctg );
+ }
+ else
+ {
+ prevCNT = NULL;
+ }
+
+ cnt = getNextContig ( ctg, prevCNT, &excep );
+
+ while ( cnt )
+ {
+ nextCnt = getNextContig ( cnt->contigID, cnt, &excep );
+
+ if ( excep && prevCNT )
+ {
+ fprintf ( stderr, "scaffolding: exception -- prev cnt from %u\n", prevCNT->contigID );
+ }
+
+ if ( nextCnt && nextCnt->used )
+ {
+ break;
+ }
+
+ setConnectUsed ( ctg, cnt->contigID, 1 );
+ ctg = cnt->contigID;
+ len += cnt->gapLen + contig_array[ctg].length;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ * ( int * ) darrayPut ( gap3, num3 ) = cnt->gapLen;
+ * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
+ prevCNT = cnt;
+ cnt = nextCnt;
+ }
+
+ len += overlaplen;
+ sum += len;
+ length_array[count++] = len;
+
+ if ( num5 + num3 < 1 )
+ {
+ fprintf ( stderr, "no scaffold created for contig %d\n", i );
+ continue;
+ }
+
+ len = prev_ctg = 0;
+
+ for ( j = num3 - 1; j >= 0; j-- )
+ {
+ if ( prev_ctg )
+ {
+ num_route = num_trace = 0;
+ traceAlongArc ( * ( unsigned int * ) darrayGet ( scaf3, j ), prev_ctg, max_steps
+ , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route );
+
+ if ( num_route == 1 )
+ {
+ gap_c++;
+ }
+ }
+
+ len += contig_array[* ( unsigned int * ) darrayGet ( scaf3, j )].length + * ( int * ) darrayGet ( gap3, j );
+ prev_ctg = * ( unsigned int * ) darrayGet ( scaf3, j );
+ gap = * ( int * ) darrayGet ( gap3, j ) > 0 ? * ( int * ) darrayGet ( gap3, j ) : 0;
+ }
+
+ for ( j = 0; j < num5; j++ )
+ {
+ if ( prev_ctg )
+ {
+ num_route = num_trace = 0;
+ traceAlongArc ( * ( unsigned int * ) darrayGet ( scaf5, j ), prev_ctg, max_steps
+ , gap - ins_size_var, gap + ins_size_var, 0, 0, &num_route );
+
+ if ( num_route == 1 )
+ {
+ gap_c++;
+ }
+ }
+
+ if ( j < num5 - 1 )
+ {
+ len += contig_array[* ( unsigned int * ) darrayGet ( scaf5, j )].length +
+ * ( int * ) darrayGet ( gap5, j );
+ prev_ctg = * ( unsigned int * ) darrayGet ( scaf5, j );
+ gap = * ( int * ) darrayGet ( gap5, j ) > 0 ? * ( int * ) darrayGet ( gap5, j ) : 0;
+ }
+ }
+ }
+
+ freeDarray ( scaf3 );
+ freeDarray ( scaf5 );
+ freeDarray ( gap3 );
+ freeDarray ( gap5 );
+
+ if ( count == 0 )
+ {
+ fprintf ( stderr, "\n\nNo scaffold was constructed.\n\n" );
+ free ( ( void * ) length_array );
+
+ for ( j = 0; j < max_n_routes; j++ )
+ {
+ free ( ( void * ) found_routes[j] );
+ }
+
+ free ( ( void * ) found_routes );
+ free ( ( void * ) so_far );
+ return;
+ }
+
+ fprintf ( stderr, "\nRank %d\n", rank );
+ fprintf ( stderr, " Scaffold number %d\n", count );
+ fprintf ( stderr, " In-scaffold contig number %u\n", num_lctg / 2 );
+ fprintf ( stderr, " Total scaffold length %lld\n", sum );
+ fprintf ( stderr, " Average scaffold length %lld\n", sum / count );
+ fprintf ( stderr, " Filled gap number %d\n", gap_c );
+
+ //output singleton
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].length + ( unsigned int ) overlaplen < len_cut || contig_array[i].flag )
+ {
+ continue;
+ }
+
+ length_array[count++] = contig_array[i].length;
+ sum += contig_array[i].length;
+
+ if ( isSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+ }
+
+ long int total_len = sum;
+ // calculate N50/N90
+ qsort ( length_array, count, sizeof ( length_array[0] ), cmp_int );
+ N50 = sum * 0.5;
+ N90 = sum * 0.9;
+ int N50length = 0;
+ int N90length = 0;
+ sum = flag = 0;
+
+ for ( j = count - 1; j >= 0; j-- )
+ {
+ sum += length_array[j];
+
+ if ( !flag && sum >= N50 && N50length == 0 )
+ {
+ N50length = length_array[j];
+ flag++;
+ }
+
+ if ( sum >= N90 && N90length == 0 )
+ {
+ N90length = length_array[j];
+ break;
+ }
+ }
+
+ fprintf ( stderr, " Longest scaffold %lld\n", length_array[count - 1] );
+ fprintf ( stderr, " Scaffold and singleton number %d\n", count );
+ fprintf ( stderr, " Scaffold and singleton length %lld\n", total_len );
+ fprintf ( stderr, " Average length %d\n", total_len / count );
+ fprintf ( stderr, " N50 %d\n", N50length );
+ fprintf ( stderr, " N90 %d\n", N90length );
+ free ( ( void * ) length_array );
+
+ for ( j = 0; j < max_n_routes; j++ )
+ {
+ free ( ( void * ) found_routes[j] );
+ }
+
+ free ( ( void * ) found_routes );
+ free ( ( void * ) so_far );
}
@@ -5022,36 +5492,38 @@ Output:
Return:
None.
*************************************************/
-static void outputLinks ( FILE * fp, int insertS )
+static void outputLinks ( FILE *fp, int insertS )
{
- unsigned int i, bal_ctg, bal_toCtg;
- CONNECT * cnts, *temp_cnt;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- cnts = contig_array[i].downwardConnect;
- bal_ctg = getTwinCtg ( i );
-
- while ( cnts )
- {
- if ( cnts->weight < 1 )
- {
- cnts = cnts->next;
- continue;
- }
-
- fprintf ( fp, "%-10d %-10d\t%d\t%d\t%d\n"
- , i, cnts->contigID, cnts->gapLen, cnts->weight, insertS );
- cnts->weight = 0;
- bal_toCtg = getTwinCtg ( cnts->contigID );
- temp_cnt = getCntBetween ( bal_toCtg, bal_ctg );
-
- if ( temp_cnt )
- { temp_cnt->weight = 0; }
-
- cnts = cnts->next;
- }
- }
+ unsigned int i, bal_ctg, bal_toCtg;
+ CONNECT *cnts, *temp_cnt;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ cnts = contig_array[i].downwardConnect;
+ bal_ctg = getTwinCtg ( i );
+
+ while ( cnts )
+ {
+ if ( cnts->weight < 1 )
+ {
+ cnts = cnts->next;
+ continue;
+ }
+
+ fprintf ( fp, "%-10d %-10d\t%d\t%d\t%d\n"
+ , i, cnts->contigID, cnts->gapLen, cnts->weight, insertS );
+ cnts->weight = 0;
+ bal_toCtg = getTwinCtg ( cnts->contigID );
+ temp_cnt = getCntBetween ( bal_toCtg, bal_ctg );
+
+ if ( temp_cnt )
+ {
+ temp_cnt->weight = 0;
+ }
+
+ cnts = cnts->next;
+ }
+ }
}
/*************************************************
@@ -5067,107 +5539,113 @@ static void outputLinks ( FILE * fp, int insertS )
Return:
None.
*************************************************/
-void PE2Links ( char * infile )
+void PE2Links ( char *infile )
{
- char name[256], *line;
- FILE * fp1;
- FILE * linkF;
- gzFile * fp2;
- int i;
- int flag = 0;
- unsigned int j;
- sprintf ( name, "%s.links", infile );
- boolean filesOK = check_file ( name );
-
- if ( filesOK )
- {
- fprintf ( stderr, "File %s exists, skip creating the links...\n", name );
- return;
- }
-
- linkF = ckopen ( name, "w" );
-
- if ( !pes )
- { loadPEgrads ( infile ); }
-
- fprintf ( stderr, "*****************************************************\nStart to load paired-end reads information.\n\n" );
-
- if ( COMPATIBLE_MODE == 1 )
- {
- sprintf ( name, "%s.readOnContig", infile );
- fp1 = ckopen ( name, "r" );
- }
- else
- {
- sprintf ( name, "%s.readOnContig.gz", infile );
- fp2 = gzopen ( name, "r" );
- }
-
- lineLen = 1024;
- line = ( char * ) ckalloc ( lineLen * sizeof ( char ) );
-
- if ( COMPATIBLE_MODE == 1 )
- {
- fgets ( line, lineLen, fp1 );
- }
- else
- {
- gzgets ( fp2, line, lineLen );
- }
-
- line[0] = '\0';
-
- for ( i = 0; i < gradsCounter; i++ )
- {
- createCntMemManager();
- createCntLookupTable();
- newCntCounter = 0;
-
- if ( COMPATIBLE_MODE == 1 )
- {
- flag += connectByPE_grad ( fp1, i, line );
- }
- else
- {
- flag += connectByPE_grad_gz ( fp2, i, line );
- }
-
- fprintf ( stderr, "%lld new connections.\n\n", newCntCounter / 2 );
-
- if ( !flag )
- {
- destroyConnectMem();
- deleteCntLookupTable();
-
- for ( j = 1; j <= num_ctg; j++ )
- { contig_array[j].downwardConnect = NULL; }
-
- fprintf ( stderr, "\n" );
- continue;
- }
-
- flag = 0;
- outputLinks ( linkF, pes[i].insertS );
- destroyConnectMem();
- deleteCntLookupTable();
-
- for ( j = 1; j <= num_ctg; j++ )
- { contig_array[j].downwardConnect = NULL; }
- }
-
- free ( ( void * ) line );
-
- if ( COMPATIBLE_MODE == 1 )
- {
- fclose ( fp1 );
- }
- else
- {
- gzclose ( fp2 );
- }
-
- fclose ( linkF );
- fprintf ( stderr, "All paired-end reads information loaded.\n" );
+ char name[256], *line;
+ FILE *fp1;
+ FILE *linkF;
+ gzFile *fp2;
+ int i;
+ int flag = 0;
+ unsigned int j;
+ sprintf ( name, "%s.links", infile );
+ boolean filesOK = check_file ( name );
+
+ if ( filesOK )
+ {
+ fprintf ( stderr, "File %s exists, skip creating the links...\n", name );
+ return;
+ }
+
+ linkF = ckopen ( name, "w" );
+
+ if ( !pes )
+ {
+ loadPEgrads ( infile );
+ }
+
+ fprintf ( stderr, "*****************************************************\nStart to load paired-end reads information.\n\n" );
+
+ if ( COMPATIBLE_MODE == 1 )
+ {
+ sprintf ( name, "%s.readOnContig", infile );
+ fp1 = ckopen ( name, "r" );
+ }
+ else
+ {
+ sprintf ( name, "%s.readOnContig.gz", infile );
+ fp2 = gzopen ( name, "r" );
+ }
+
+ lineLen = 1024;
+ line = ( char * ) ckalloc ( lineLen * sizeof ( char ) );
+
+ if ( COMPATIBLE_MODE == 1 )
+ {
+ fgets ( line, lineLen, fp1 );
+ }
+ else
+ {
+ gzgets ( fp2, line, lineLen );
+ }
+
+ line[0] = '\0';
+
+ for ( i = 0; i < gradsCounter; i++ )
+ {
+ createCntMemManager();
+ createCntLookupTable();
+ newCntCounter = 0;
+
+ if ( COMPATIBLE_MODE == 1 )
+ {
+ flag += connectByPE_grad ( fp1, i, line );
+ }
+ else
+ {
+ flag += connectByPE_grad_gz ( fp2, i, line );
+ }
+
+ fprintf ( stderr, "%lld new connections.\n\n", newCntCounter / 2 );
+
+ if ( !flag )
+ {
+ destroyConnectMem();
+ deleteCntLookupTable();
+
+ for ( j = 1; j <= num_ctg; j++ )
+ {
+ contig_array[j].downwardConnect = NULL;
+ }
+
+ fprintf ( stderr, "\n" );
+ continue;
+ }
+
+ flag = 0;
+ outputLinks ( linkF, pes[i].insertS );
+ destroyConnectMem();
+ deleteCntLookupTable();
+
+ for ( j = 1; j <= num_ctg; j++ )
+ {
+ contig_array[j].downwardConnect = NULL;
+ }
+ }
+
+ free ( ( void * ) line );
+
+ if ( COMPATIBLE_MODE == 1 )
+ {
+ fclose ( fp1 );
+ }
+ else
+ {
+ gzclose ( fp2 );
+ }
+
+ fclose ( linkF );
+ fprintf ( stderr, "All paired-end reads information loaded.\n" );
}
/*************************************************
@@ -5184,84 +5662,92 @@ void PE2Links ( char * infile )
Return:
loaded record number.
*************************************************/
-static int inputLinks ( FILE * fp, int insertS, char * line )
+static int inputLinks ( FILE *fp, int insertS, char *line )
{
- unsigned int ctg, bal_ctg, toCtg, bal_toCtg;
- int gap, wt, ins;
- unsigned int counter = 0, onScafCounter = 0;
- unsigned int maskCounter = 0;
- CONNECT * cnt, *bal_cnt;
-
- if ( strlen ( line ) )
- {
- sscanf ( line, "%d %d %d %d %d", &ctg, &toCtg, &gap, &wt, &ins );
-
- if ( ins != insertS )
- { return counter; }
-
- if ( 1 )
- {
- bal_ctg = getTwinCtg ( ctg );
- bal_toCtg = getTwinCtg ( toCtg );
- cnt = add1Connect ( ctg, toCtg, gap, wt, 0 );
- bal_cnt = add1Connect ( bal_toCtg, bal_ctg, gap, wt, 0 );
-
- if ( cnt && insertS > 1000 )
- {
- cnt->newIns = bal_cnt->newIns = 1;
- }
-
- counter++;
-
- if ( contig_array[ctg].mask || contig_array[toCtg].mask )
- { maskCounter++; }
-
- if ( insertS > 1000 &&
- contig_array[ctg].from_vt == contig_array[toCtg].from_vt && // on the same scaff
- contig_array[ctg].indexInScaf < contig_array[toCtg].indexInScaf )
- {
- add1LongPEcov ( ctg, toCtg, wt );
- onScafCounter++;
- }
- }
- }
-
- while ( fgets ( line, lineLen, fp ) != NULL )
- {
- sscanf ( line, "%d %d %d %d %d", &ctg, &toCtg, &gap, &wt, &ins );
-
- if ( ins > insertS )
- { break; }
-
- if ( insertS > 1000 &&
- contig_array[ctg].from_vt == contig_array[toCtg].from_vt && // on the same scaff
- contig_array[ctg].indexInScaf < contig_array[toCtg].indexInScaf )
- {
- add1LongPEcov ( ctg, toCtg, wt );
- onScafCounter++;
- }
-
- bal_ctg = getTwinCtg ( ctg );
- bal_toCtg = getTwinCtg ( toCtg );
- cnt = add1Connect ( ctg, toCtg, gap, wt, 0 );
- bal_cnt = add1Connect ( bal_toCtg, bal_ctg, gap, wt, 0 );
-
- if ( cnt && insertS > 1000 )
- {
- cnt->newIns = bal_cnt->newIns = 1;
- }
-
- counter++;
-
- if ( contig_array[ctg].mask || contig_array[toCtg].mask )
- { maskCounter++; }
- }
-
- fprintf ( stderr, "***************************\nFor insert size: %d\n", insertS );
- fprintf ( stderr, " Total PE links %d\n", counter );
- fprintf ( stderr, " PE links to masked contigs %d\n", maskCounter );
- fprintf ( stderr, " On same scaffold PE links %d\n", onScafCounter );
- return counter;
+ unsigned int ctg, bal_ctg, toCtg, bal_toCtg;
+ int gap, wt, ins;
+ unsigned int counter = 0, onScafCounter = 0;
+ unsigned int maskCounter = 0;
+ CONNECT *cnt, *bal_cnt;
+
+ if ( strlen ( line ) )
+ {
+ sscanf ( line, "%d %d %d %d %d", &ctg, &toCtg, &gap, &wt, &ins );
+
+ if ( ins != insertS )
+ {
+ return counter;
+ }
+
+ if ( 1 )
+ {
+ bal_ctg = getTwinCtg ( ctg );
+ bal_toCtg = getTwinCtg ( toCtg );
+ cnt = add1Connect ( ctg, toCtg, gap, wt, 0 );
+ bal_cnt = add1Connect ( bal_toCtg, bal_ctg, gap, wt, 0 );
+
+ if ( cnt && insertS > 1000 )
+ {
+ cnt->newIns = bal_cnt->newIns = 1;
+ }
+
+ counter++;
+
+ if ( contig_array[ctg].mask || contig_array[toCtg].mask )
+ {
+ maskCounter++;
+ }
+
+ if ( insertS > 1000 &&
+ contig_array[ctg].from_vt == contig_array[toCtg].from_vt && // on the same scaff
+ contig_array[ctg].indexInScaf < contig_array[toCtg].indexInScaf )
+ {
+ add1LongPEcov ( ctg, toCtg, wt );
+ onScafCounter++;
+ }
+ }
+ }
+
+ while ( fgets ( line, lineLen, fp ) != NULL )
+ {
+ sscanf ( line, "%d %d %d %d %d", &ctg, &toCtg, &gap, &wt, &ins );
+
+ if ( ins > insertS )
+ {
+ break;
+ }
+
+ if ( insertS > 1000 &&
+ contig_array[ctg].from_vt == contig_array[toCtg].from_vt && // on the same scaff
+ contig_array[ctg].indexInScaf < contig_array[toCtg].indexInScaf )
+ {
+ add1LongPEcov ( ctg, toCtg, wt );
+ onScafCounter++;
+ }
+
+ bal_ctg = getTwinCtg ( ctg );
+ bal_toCtg = getTwinCtg ( toCtg );
+ cnt = add1Connect ( ctg, toCtg, gap, wt, 0 );
+ bal_cnt = add1Connect ( bal_toCtg, bal_ctg, gap, wt, 0 );
+
+ if ( cnt && insertS > 1000 )
+ {
+ cnt->newIns = bal_cnt->newIns = 1;
+ }
+
+ counter++;
+
+ if ( contig_array[ctg].mask || contig_array[toCtg].mask )
+ {
+ maskCounter++;
+ }
+ }
+
+ fprintf ( stderr, "***************************\nFor insert size: %d\n", insertS );
+ fprintf ( stderr, " Total PE links %d\n", counter );
+ fprintf ( stderr, " PE links to masked contigs %d\n", maskCounter );
+ fprintf ( stderr, " On same scaffold PE links %d\n", onScafCounter );
+ return counter;
}
/*************************************************
@@ -5276,163 +5762,179 @@ Output:
Return:
None.
*************************************************/
-void Links2Scaf ( char * infile )
+void Links2Scaf ( char *infile )
{
- char name[256], *line;
- FILE * fp;
- int i, j = 1, lib_n = 0, cutoff_sum = 0;
- int flag = 0, flag2;
- boolean downS, nonLinear = 0, smallPE = 0, isPrevSmall = 0, markSmall;
-
- if ( cvg4SNP > 0.001 )
- {
- sprintf ( name, "%s.bubbleInScaff", infile );
- snp_fp = ckopen ( name, "w" );
- }
-
- cvg4SNP = ( double ) ( cvg4SNP * cvgAvg );
-
- if ( !pes )
- { loadPEgrads ( infile ); }
-
- sprintf ( name, "%s.links", infile );
- fp = ckopen ( name, "r" );
- createCntMemManager();
- createCntLookupTable();
- lineLen = 1024;
- line = ( char * ) ckalloc ( lineLen * sizeof ( char ) );
- fgets ( line, lineLen, fp );
- line[0] = '\0';
- solidArray = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- tempArray = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- scaf3 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- scaf5 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- gap3 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
- gap5 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
- weakPE = 3;
- fprintf ( stderr, "\n" );
-
- for ( i = 0; i < gradsCounter; i++ )
- {
- if ( MinWeakCut == 0 && i == 0 )
- { MinWeakCut = pes[i].pair_num_cut; }
-
- if ( pes[i].insertS < 1000 )
- {
- isPrevSmall = 1;
-
- if ( MinWeakCut > pes[i].pair_num_cut )
- { MinWeakCut = pes[i].pair_num_cut; }
- }
- else if ( pes[i].insertS > 1000 && isPrevSmall )
- {
- smallScaf();
- isPrevSmall = 0;
- }
-
- Insert_size = pes[i].insertS;
- discardCntCounter = 0;
- flag2 = inputLinks ( fp, pes[i].insertS, line );
-
- if ( flag2 )
- {
- lib_n++;
- cutoff_sum += pes[i].pair_num_cut;
- }
-
- flag += flag2;
-
- if ( !flag )
- {
- fprintf ( stderr, "\n" );
- continue;
- }
-
- if ( i == gradsCounter - 1 || pes[i + 1].rank != pes[i].rank )
- {
- flag = nonLinear = downS = markSmall = 0;
-
- if ( pes[i].insertS > 1000 && pes[i].rank > 1 )
- { downS = 1; }
-
- if ( pes[i].insertS <= 1000 )
- { smallPE = 1; }
-
- if ( pes[i].insertS >= 1000 )
- {
- ins_size_var = 50;
- OverlapPercent = 0.05;
- }
- else if ( pes[i].insertS >= 300 )
- {
- ins_size_var = 30;
- OverlapPercent = 0.05;
- }
- else
- {
- ins_size_var = 20;
- OverlapPercent = 0.05;
- }
-
- if ( pes[i].insertS > 1000 )
- { weakPE = 5; }
-
- bySmall = Insert_size > 1000 ? 0 : 1;
-
- if ( lib_n > 0 )
- {
- weakPE = weakPE < cutoff_sum / lib_n ? cutoff_sum / lib_n : weakPE;
- lib_n = cutoff_sum = 0;
- }
-
- if ( MinWeakCut > weakPE )
- { MinWeakCut = weakPE; }
-
- fprintf ( stderr, "Cutoff of PE links to make a reliable connection: %d\n", weakPE );
-
- if ( i == gradsCounter - 1 )
- { nonLinear = 1; }
-
- if ( Insert_size > 1000 )
- {
- detectBreakScaff();
- }
-
- ordering ( 1, downS, nonLinear, infile );
-
- if ( i == gradsCounter - 1 )
- {
- recoverMask();
- }
- else
- {
- scaffold_count ( j, 100 );
- j++;
- fprintf ( stderr, "\n" );
- }
-
- if ( Insert_size > 1000 && i != gradsCounter - 1 )
- {
- clearNewInsFlag();
- }
- }
- }
-
- freeDarray ( tempArray );
- freeDarray ( solidArray );
- freeDarray ( scaf3 );
- freeDarray ( scaf5 );
- freeDarray ( gap3 );
- freeDarray ( gap5 );
- free ( ( void * ) line );
- fclose ( fp );
-
- if ( cvg4SNP > 0.001 )
- {
- fclose ( snp_fp );
- }
-
- fprintf ( stderr, "\nAll links loaded.\n" );
+ char name[256], *line;
+ FILE *fp;
+ int i, j = 1, lib_n = 0, cutoff_sum = 0;
+ int flag = 0, flag2;
+ boolean downS, nonLinear = 0, smallPE = 0, isPrevSmall = 0, markSmall;
+
+ if ( cvg4SNP > 0.001 )
+ {
+ sprintf ( name, "%s.bubbleInScaff", infile );
+ snp_fp = ckopen ( name, "w" );
+ }
+
+ cvg4SNP = ( double ) ( cvg4SNP * cvgAvg );
+
+ if ( !pes )
+ {
+ loadPEgrads ( infile );
+ }
+
+ sprintf ( name, "%s.links", infile );
+ fp = ckopen ( name, "r" );
+ createCntMemManager();
+ createCntLookupTable();
+ lineLen = 1024;
+ line = ( char * ) ckalloc ( lineLen * sizeof ( char ) );
+ fgets ( line, lineLen, fp );
+ line[0] = '\0';
+ solidArray = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ tempArray = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ scaf3 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ scaf5 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ gap3 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
+ gap5 = ( DARRAY * ) createDarray ( 1000, sizeof ( int ) );
+ weakPE = 3;
+ fprintf ( stderr, "\n" );
+
+ for ( i = 0; i < gradsCounter; i++ )
+ {
+ if ( MinWeakCut == 0 && i == 0 )
+ {
+ MinWeakCut = pes[i].pair_num_cut;
+ }
+
+ if ( pes[i].insertS < 1000 )
+ {
+ isPrevSmall = 1;
+
+ if ( MinWeakCut > pes[i].pair_num_cut )
+ {
+ MinWeakCut = pes[i].pair_num_cut;
+ }
+ }
+ else if ( pes[i].insertS > 1000 && isPrevSmall )
+ {
+ smallScaf();
+ isPrevSmall = 0;
+ }
+
+ Insert_size = pes[i].insertS;
+ discardCntCounter = 0;
+ flag2 = inputLinks ( fp, pes[i].insertS, line );
+
+ if ( flag2 )
+ {
+ lib_n++;
+ cutoff_sum += pes[i].pair_num_cut;
+ }
+
+ flag += flag2;
+
+ if ( !flag )
+ {
+ fprintf ( stderr, "\n" );
+ continue;
+ }
+
+ if ( i == gradsCounter - 1 || pes[i + 1].rank != pes[i].rank )
+ {
+ flag = nonLinear = downS = markSmall = 0;
+
+ if ( pes[i].insertS > 1000 && pes[i].rank > 1 )
+ {
+ downS = 1;
+ }
+
+ if ( pes[i].insertS <= 1000 )
+ {
+ smallPE = 1;
+ }
+
+ if ( pes[i].insertS >= 1000 )
+ {
+ ins_size_var = 50;
+ OverlapPercent = 0.05;
+ }
+ else if ( pes[i].insertS >= 300 )
+ {
+ ins_size_var = 30;
+ OverlapPercent = 0.05;
+ }
+ else
+ {
+ ins_size_var = 20;
+ OverlapPercent = 0.05;
+ }
+
+ if ( pes[i].insertS > 1000 )
+ {
+ weakPE = 5;
+ }
+
+ bySmall = Insert_size > 1000 ? 0 : 1;
+
+ if ( lib_n > 0 )
+ {
+ weakPE = weakPE < cutoff_sum / lib_n ? cutoff_sum / lib_n : weakPE;
+ lib_n = cutoff_sum = 0;
+ }
+
+ if ( MinWeakCut > weakPE )
+ {
+ MinWeakCut = weakPE;
+ }
+
+ fprintf ( stderr, "Cutoff of PE links to make a reliable connection: %d\n", weakPE );
+
+ if ( i == gradsCounter - 1 )
+ {
+ nonLinear = 1;
+ }
+
+ if ( Insert_size > 1000 )
+ {
+ detectBreakScaff();
+ }
+
+ ordering ( 1, downS, nonLinear, infile );
+
+ if ( i == gradsCounter - 1 )
+ {
+ recoverMask();
+ }
+ else
+ {
+ scaffold_count ( j, 100 );
+ j++;
+ fprintf ( stderr, "\n" );
+ }
+
+ if ( Insert_size > 1000 && i != gradsCounter - 1 )
+ {
+ clearNewInsFlag();
+ }
+ }
+ }
+
+ freeDarray ( tempArray );
+ freeDarray ( solidArray );
+ freeDarray ( scaf3 );
+ freeDarray ( scaf5 );
+ freeDarray ( gap3 );
+ freeDarray ( gap5 );
+ free ( ( void * ) line );
+ fclose ( fp );
+
+ if ( cvg4SNP > 0.001 )
+ {
+ fclose ( snp_fp );
+ }
+
+ fprintf ( stderr, "\nAll links loaded.\n" );
}
/*************************************************
@@ -5453,25 +5955,31 @@ Return:
*************************************************/
static boolean putNodeInArray ( unsigned int node, int maxNodes, int dis )
{
- if ( contig_array[node].inSubGraph )
- { return 1; }
-
- int index = nodeCounter;
-
- if ( index > maxNodes )
- { return 0; }
-
- if ( contig_array[getTwinCtg ( node )].inSubGraph )
- { return 0; }
-
- ctg4heapArray[index].ctgID = node;
- ctg4heapArray[index].dis = dis;
- contig_array[node].inSubGraph = 1;
- ctg4heapArray[index].ds_shut4dheap = 0;
- ctg4heapArray[index].us_shut4dheap = 0;
- ctg4heapArray[index].ds_shut4uheap = 0;
- ctg4heapArray[index].us_shut4uheap = 0;
- return 1;
+ if ( contig_array[node].inSubGraph )
+ {
+ return 1;
+ }
+
+ int index = nodeCounter;
+
+ if ( index > maxNodes )
+ {
+ return 0;
+ }
+
+ if ( contig_array[getTwinCtg ( node )].inSubGraph )
+ {
+ return 0;
+ }
+
+ ctg4heapArray[index].ctgID = node;
+ ctg4heapArray[index].dis = dis;
+ contig_array[node].inSubGraph = 1;
+ ctg4heapArray[index].ds_shut4dheap = 0;
+ ctg4heapArray[index].us_shut4dheap = 0;
+ ctg4heapArray[index].ds_shut4uheap = 0;
+ ctg4heapArray[index].us_shut4uheap = 0;
+ return 1;
}
/*************************************************
@@ -5488,17 +5996,19 @@ Return:
*************************************************/
static void setInGraph ( boolean flag )
{
- int i;
- int node;
- nodeCounter = nodeCounter > MaxNodeInSub ? MaxNodeInSub : nodeCounter;
-
- for ( i = 1; i <= nodeCounter; i++ )
- {
- node = ctg4heapArray[i].ctgID;
-
- if ( node > 0 )
- { contig_array[node].inSubGraph = flag; }
- }
+ int i;
+ int node;
+ nodeCounter = nodeCounter > MaxNodeInSub ? MaxNodeInSub : nodeCounter;
+
+ for ( i = 1; i <= nodeCounter; i++ )
+ {
+ node = ctg4heapArray[i].ctgID;
+
+ if ( node > 0 )
+ {
+ contig_array[node].inSubGraph = flag;
+ }
+ }
}
@@ -5517,15 +6027,17 @@ Return:
*************************************************/
static int getIndexInArr ( const unsigned int node )
{
- int i = 1;
+ int i = 1;
- for ( ; i <= nodeCounter; ++i )
- {
- if ( node == ctg4heapArray[i].ctgID )
- { return i; }
- }
+ for ( ; i <= nodeCounter; ++i )
+ {
+ if ( node == ctg4heapArray[i].ctgID )
+ {
+ return i;
+ }
+ }
- return 0;
+ return 0;
}
@@ -5555,43 +6067,51 @@ Return:
0 otherwise.
*************************************************/
static boolean dispatch1node ( int dis, unsigned int tempNode, int maxNodes,
- FibHeap * dheap, FibHeap * uheap, int * DmaxDis, int * UmaxDis )
+ FibHeap *dheap, FibHeap *uheap, int *DmaxDis, int *UmaxDis )
{
- boolean ret;
+ boolean ret;
- if ( dis >= 0 ) // put it to Dheap
- {
- nodeCounter++;
- ret = putNodeInArray ( tempNode, maxNodes, dis );
+ if ( dis >= 0 ) // put it to Dheap
+ {
+ nodeCounter++;
+ ret = putNodeInArray ( tempNode, maxNodes, dis );
- if ( !ret )
- { return 0; }
+ if ( !ret )
+ {
+ return 0;
+ }
- insertNodeIntoHeap ( dheap, dis, nodeCounter );
+ insertNodeIntoHeap ( dheap, dis, nodeCounter );
- if ( dis > *DmaxDis )
- { *DmaxDis = dis; }
+ if ( dis > *DmaxDis )
+ {
+ *DmaxDis = dis;
+ }
- return 1;
- }
- else // put it to Uheap
- {
- nodeCounter++;
- ret = putNodeInArray ( tempNode, maxNodes, dis );
+ return 1;
+ }
+ else // put it to Uheap
+ {
+ nodeCounter++;
+ ret = putNodeInArray ( tempNode, maxNodes, dis );
- if ( !ret )
- { return 0; }
+ if ( !ret )
+ {
+ return 0;
+ }
- insertNodeIntoHeap ( uheap, -dis, nodeCounter );
- int temp_len = contig_array[tempNode].length;
+ insertNodeIntoHeap ( uheap, -dis, nodeCounter );
+ int temp_len = contig_array[tempNode].length;
- if ( -dis + temp_len > *UmaxDis )
- { *UmaxDis = -dis + contig_array[tempNode].length; }
+ if ( -dis + temp_len > *UmaxDis )
+ {
+ *UmaxDis = -dis + contig_array[tempNode].length;
+ }
- return -1;
- }
+ return -1;
+ }
- return 0;
+ return 0;
}
/*************************************************
@@ -5611,10 +6131,14 @@ Return:
*************************************************/
static boolean canDheapWait ( unsigned int currNode, int dis, int DmaxDis )
{
- if ( dis < DmaxDis )
- { return 0; }
- else
- { return 1; }
+ if ( dis < DmaxDis )
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
}
/*************************************************
@@ -5648,98 +6172,112 @@ Output:
Return:
0 if operation of putting contig into sub-graph failed.
*************************************************/
-static boolean workOnDheap ( FibHeap * dheap, FibHeap * uheap, boolean * Dwait, boolean * Uwait,
- int * DmaxDis, int * UmaxDis, int maxNodes )
+static boolean workOnDheap ( FibHeap *dheap, FibHeap *uheap, boolean *Dwait, boolean *Uwait,
+ int *DmaxDis, int *UmaxDis, int maxNodes )
{
- if ( *Dwait )
- { return 1; }
-
- unsigned int currNode, twin, tempNode;
- CTGinHEAP * ctgInHeap;
- int indexInArray;
- CONNECT * us_cnt, *ds_cnt;
- int dis0, dis;
- boolean ret, isEmpty;
-
- while ( ( indexInArray = removeNextNodeFromHeap ( dheap ) ) != 0 )
- {
- ctgInHeap = &ctg4heapArray[indexInArray];
- currNode = ctgInHeap->ctgID;
- dis0 = ctgInHeap->dis;
- isEmpty = IsHeapEmpty ( dheap );
- twin = getTwinCtg ( currNode );
- us_cnt = ctgInHeap->us_shut4dheap ? NULL : contig_array[twin].downwardConnect;
-
- while ( us_cnt )
- {
- if ( us_cnt->deleted || us_cnt->mask ||
- contig_array[getTwinCtg ( us_cnt->contigID )].inSubGraph )
- {
- us_cnt = us_cnt->next;
- continue;
- }
-
- tempNode = getTwinCtg ( us_cnt->contigID );
-
- if ( contig_array[tempNode].inSubGraph )
- {
- us_cnt = us_cnt->next;
- continue;
- }
-
- dis = dis0 - us_cnt->gapLen - ( int ) contig_array[twin].length;
- ret = dispatch1node ( dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis );
-
- if ( ret == 0 )
- { return 0; }
- else if ( ret < 0 )
- { *Uwait = 0; }
-
- us_cnt = us_cnt->next;
- }
-
- if ( nodeCounter > 1 && isEmpty )
- {
- *Dwait = canDheapWait ( currNode, dis0, *DmaxDis );
-
- if ( *Dwait )
- {
- isEmpty = IsHeapEmpty ( dheap );
- insertNodeIntoHeap ( dheap, dis0, indexInArray );
- ctg4heapArray[indexInArray].us_shut4dheap = 1;
-
- if ( isEmpty )
- { return 1; }
- else
- { continue; }
- }
- }
-
- ds_cnt = ctgInHeap->ds_shut4dheap ? NULL : contig_array[currNode].downwardConnect;
-
- while ( ds_cnt )
- {
- if ( ds_cnt->deleted || ds_cnt->mask || contig_array[ds_cnt->contigID].inSubGraph )
- {
- ds_cnt = ds_cnt->next;
- continue;
- }
-
- tempNode = ds_cnt->contigID;
- dis = dis0 + ds_cnt->gapLen + ( int ) contig_array[tempNode].length;
- ret = dispatch1node ( dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis );
-
- if ( ret == 0 )
- { return 0; }
- else if ( ret < 0 )
- { *Uwait = 0; }
-
- ds_cnt = ds_cnt->next;
- } // for each downstream connections
- } // for each node comes off the heap
-
- *Dwait = 1;
- return 1;
+ if ( *Dwait )
+ {
+ return 1;
+ }
+
+ unsigned int currNode, twin, tempNode;
+ CTGinHEAP *ctgInHeap;
+ int indexInArray;
+ CONNECT *us_cnt, *ds_cnt;
+ int dis0, dis;
+ boolean ret, isEmpty;
+
+ while ( ( indexInArray = removeNextNodeFromHeap ( dheap ) ) != 0 )
+ {
+ ctgInHeap = &ctg4heapArray[indexInArray];
+ currNode = ctgInHeap->ctgID;
+ dis0 = ctgInHeap->dis;
+ isEmpty = IsHeapEmpty ( dheap );
+ twin = getTwinCtg ( currNode );
+ us_cnt = ctgInHeap->us_shut4dheap ? NULL : contig_array[twin].downwardConnect;
+
+ while ( us_cnt )
+ {
+ if ( us_cnt->deleted || us_cnt->mask ||
+ contig_array[getTwinCtg ( us_cnt->contigID )].inSubGraph )
+ {
+ us_cnt = us_cnt->next;
+ continue;
+ }
+
+ tempNode = getTwinCtg ( us_cnt->contigID );
+
+ if ( contig_array[tempNode].inSubGraph )
+ {
+ us_cnt = us_cnt->next;
+ continue;
+ }
+
+ dis = dis0 - us_cnt->gapLen - ( int ) contig_array[twin].length;
+ ret = dispatch1node ( dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis );
+
+ if ( ret == 0 )
+ {
+ return 0;
+ }
+ else if ( ret < 0 )
+ {
+ *Uwait = 0;
+ }
+
+ us_cnt = us_cnt->next;
+ }
+
+ if ( nodeCounter > 1 && isEmpty )
+ {
+ *Dwait = canDheapWait ( currNode, dis0, *DmaxDis );
+
+ if ( *Dwait )
+ {
+ isEmpty = IsHeapEmpty ( dheap );
+ insertNodeIntoHeap ( dheap, dis0, indexInArray );
+ ctg4heapArray[indexInArray].us_shut4dheap = 1;
+
+ if ( isEmpty )
+ {
+ return 1;
+ }
+ else
+ {
+ continue;
+ }
+ }
+ }
+
+ ds_cnt = ctgInHeap->ds_shut4dheap ? NULL : contig_array[currNode].downwardConnect;
+
+ while ( ds_cnt )
+ {
+ if ( ds_cnt->deleted || ds_cnt->mask || contig_array[ds_cnt->contigID].inSubGraph )
+ {
+ ds_cnt = ds_cnt->next;
+ continue;
+ }
+
+ tempNode = ds_cnt->contigID;
+ dis = dis0 + ds_cnt->gapLen + ( int ) contig_array[tempNode].length;
+ ret = dispatch1node ( dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis );
+
+ if ( ret == 0 )
+ {
+ return 0;
+ }
+ else if ( ret < 0 )
+ {
+ *Uwait = 0;
+ }
+
+ ds_cnt = ds_cnt->next;
+ } // for each downstream connections
+ } // for each node comes off the heap
+
+ *Dwait = 1;
+ return 1;
}
/*************************************************
@@ -5759,12 +6297,16 @@ Return:
*************************************************/
static boolean canUheapWait ( unsigned int currNode, int dis, int UmaxDis )
{
- int temp_len = contig_array[currNode].length;
-
- if ( -dis + temp_len < UmaxDis )
- { return 0; }
- else
- { return 1; }
+ int temp_len = contig_array[currNode].length;
+
+ if ( -dis + temp_len < UmaxDis )
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
}
/*************************************************
@@ -5796,98 +6338,112 @@ Output:
Return:
0 if operation of putting contig into sub-graph failed.
*************************************************/
-static boolean workOnUheap ( FibHeap * dheap, FibHeap * uheap, boolean * Dwait, boolean * Uwait,
- int * DmaxDis, int * UmaxDis, int maxNodes )
+static boolean workOnUheap ( FibHeap *dheap, FibHeap *uheap, boolean *Dwait, boolean *Uwait,
+ int *DmaxDis, int *UmaxDis, int maxNodes )
{
- if ( *Uwait )
- { return 1; }
-
- unsigned int currNode, twin, tempNode;
- CTGinHEAP * ctgInHeap;
- int indexInArray;
- CONNECT * us_cnt, *ds_cnt;
- int dis0, dis;
- boolean ret, isEmpty;
-
- while ( ( indexInArray = removeNextNodeFromHeap ( uheap ) ) != 0 )
- {
- ctgInHeap = &ctg4heapArray[indexInArray];
- currNode = ctgInHeap->ctgID;
- dis0 = ctgInHeap->dis;
- isEmpty = IsHeapEmpty ( uheap );
- ds_cnt = ctgInHeap->ds_shut4uheap ? NULL : contig_array[currNode].downwardConnect;
-
- while ( ds_cnt )
- {
- if ( ds_cnt->deleted || ds_cnt->mask || contig_array[ds_cnt->contigID].inSubGraph )
- {
- ds_cnt = ds_cnt->next;
- continue;
- }
-
- tempNode = ds_cnt->contigID;
- dis = dis0 + ds_cnt->gapLen + contig_array[tempNode].length;
- ret = dispatch1node ( dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis );
-
- if ( ret == 0 )
- { return 0; }
- else if ( ret > 0 )
- { *Dwait = 0; }
-
- ds_cnt = ds_cnt->next;
- } // for each downstream connections
-
- if ( nodeCounter > 1 && isEmpty )
- {
- *Uwait = canUheapWait ( currNode, dis0, *UmaxDis );
-
- if ( *Uwait )
- {
- isEmpty = IsHeapEmpty ( uheap );
- insertNodeIntoHeap ( uheap, dis0, indexInArray );
- ctg4heapArray[indexInArray].ds_shut4uheap = 1;
-
- if ( isEmpty )
- { return 1; }
- else
- { continue; }
- }
- }
-
- twin = getTwinCtg ( currNode );
- us_cnt = ctgInHeap->us_shut4uheap ? NULL : contig_array[twin].downwardConnect;
-
- while ( us_cnt )
- {
- if ( us_cnt->deleted || us_cnt->mask ||
- contig_array[getTwinCtg ( us_cnt->contigID )].inSubGraph )
- {
- us_cnt = us_cnt->next;
- continue;
- }
-
- tempNode = getTwinCtg ( us_cnt->contigID );
-
- if ( contig_array[tempNode].inSubGraph )
- {
- us_cnt = us_cnt->next;
- continue;
- }
-
- dis = dis0 - us_cnt->gapLen - contig_array[twin].length;
- ret = dispatch1node ( dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis );
-
- if ( ret == 0 )
- { return 0; }
- else if ( ret > 0 )
- { *Dwait = 0; }
-
- us_cnt = us_cnt->next;
- }
- } // for each node comes off the heap
-
- *Uwait = 1;
- return 1;
+ if ( *Uwait )
+ {
+ return 1;
+ }
+
+ unsigned int currNode, twin, tempNode;
+ CTGinHEAP *ctgInHeap;
+ int indexInArray;
+ CONNECT *us_cnt, *ds_cnt;
+ int dis0, dis;
+ boolean ret, isEmpty;
+
+ while ( ( indexInArray = removeNextNodeFromHeap ( uheap ) ) != 0 )
+ {
+ ctgInHeap = &ctg4heapArray[indexInArray];
+ currNode = ctgInHeap->ctgID;
+ dis0 = ctgInHeap->dis;
+ isEmpty = IsHeapEmpty ( uheap );
+ ds_cnt = ctgInHeap->ds_shut4uheap ? NULL : contig_array[currNode].downwardConnect;
+
+ while ( ds_cnt )
+ {
+ if ( ds_cnt->deleted || ds_cnt->mask || contig_array[ds_cnt->contigID].inSubGraph )
+ {
+ ds_cnt = ds_cnt->next;
+ continue;
+ }
+
+ tempNode = ds_cnt->contigID;
+ dis = dis0 + ds_cnt->gapLen + contig_array[tempNode].length;
+ ret = dispatch1node ( dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis );
+
+ if ( ret == 0 )
+ {
+ return 0;
+ }
+ else if ( ret > 0 )
+ {
+ *Dwait = 0;
+ }
+
+ ds_cnt = ds_cnt->next;
+ } // for each downstream connections
+
+ if ( nodeCounter > 1 && isEmpty )
+ {
+ *Uwait = canUheapWait ( currNode, dis0, *UmaxDis );
+
+ if ( *Uwait )
+ {
+ isEmpty = IsHeapEmpty ( uheap );
+ insertNodeIntoHeap ( uheap, dis0, indexInArray );
+ ctg4heapArray[indexInArray].ds_shut4uheap = 1;
+
+ if ( isEmpty )
+ {
+ return 1;
+ }
+ else
+ {
+ continue;
+ }
+ }
+ }
+
+ twin = getTwinCtg ( currNode );
+ us_cnt = ctgInHeap->us_shut4uheap ? NULL : contig_array[twin].downwardConnect;
+
+ while ( us_cnt )
+ {
+ if ( us_cnt->deleted || us_cnt->mask ||
+ contig_array[getTwinCtg ( us_cnt->contigID )].inSubGraph )
+ {
+ us_cnt = us_cnt->next;
+ continue;
+ }
+
+ tempNode = getTwinCtg ( us_cnt->contigID );
+
+ if ( contig_array[tempNode].inSubGraph )
+ {
+ us_cnt = us_cnt->next;
+ continue;
+ }
+
+ dis = dis0 - us_cnt->gapLen - contig_array[twin].length;
+ ret = dispatch1node ( dis, tempNode, maxNodes, dheap, uheap, DmaxDis, UmaxDis );
+
+ if ( ret == 0 )
+ {
+ return 0;
+ }
+ else if ( ret > 0 )
+ {
+ *Dwait = 0;
+ }
+
+ us_cnt = us_cnt->next;
+ }
+ } // for each node comes off the heap
+
+ *Uwait = 1;
+ return 1;
}
/*************************************************
@@ -5906,56 +6462,56 @@ Return:
*************************************************/
static boolean pickUpGeneralSubgraph ( unsigned int node1, int maxNodes )
{
- FibHeap * Uheap = newFibHeap(); // heap for upstream contigs to node1
- FibHeap * Dheap = newFibHeap();
- int UmaxDis; // max distance upstream to node1
- int DmaxDis;
- boolean Uwait; // wait signal for Uheap
- boolean Dwait;
- int dis;
- boolean ret;
- //initiate: node1 is put to array once, and to both Dheap and Uheap
- dis = 0;
- nodeCounter = 1;
- putNodeInArray ( node1, maxNodes, dis );
- insertNodeIntoHeap ( Dheap, dis, nodeCounter );
- ctg4heapArray[nodeCounter].us_shut4dheap = 1;
- Dwait = 0;
- DmaxDis = 0;
- insertNodeIntoHeap ( Uheap, dis, nodeCounter );
- ctg4heapArray[nodeCounter].ds_shut4uheap = 1;
- Uwait = 1;
- UmaxDis = contig_array[node1].length;
-
- while ( 1 )
- {
- ret = workOnDheap ( Dheap, Uheap, &Dwait, &Uwait, &DmaxDis, &UmaxDis, maxNodes );
-
- if ( !ret )
- {
- setInGraph ( 0 );
- destroyHeap ( Dheap );
- destroyHeap ( Uheap );
- return 0;
- }
-
- ret = workOnUheap ( Dheap, Uheap, &Dwait, &Uwait, &DmaxDis, &UmaxDis, maxNodes );
-
- if ( !ret )
- {
- setInGraph ( 0 );
- destroyHeap ( Dheap );
- destroyHeap ( Uheap );
- return 0;
- }
-
- if ( Uwait && Dwait )
- {
- destroyHeap ( Dheap );
- destroyHeap ( Uheap );
- return 1;
- }
- }
+ FibHeap *Uheap = newFibHeap(); // heap for upstream contigs to node1
+ FibHeap *Dheap = newFibHeap();
+ int UmaxDis; // max distance upstream to node1
+ int DmaxDis;
+ boolean Uwait; // wait signal for Uheap
+ boolean Dwait;
+ int dis;
+ boolean ret;
+ //initiate: node1 is put to array once, and to both Dheap and Uheap
+ dis = 0;
+ nodeCounter = 1;
+ putNodeInArray ( node1, maxNodes, dis );
+ insertNodeIntoHeap ( Dheap, dis, nodeCounter );
+ ctg4heapArray[nodeCounter].us_shut4dheap = 1;
+ Dwait = 0;
+ DmaxDis = 0;
+ insertNodeIntoHeap ( Uheap, dis, nodeCounter );
+ ctg4heapArray[nodeCounter].ds_shut4uheap = 1;
+ Uwait = 1;
+ UmaxDis = contig_array[node1].length;
+
+ while ( 1 )
+ {
+ ret = workOnDheap ( Dheap, Uheap, &Dwait, &Uwait, &DmaxDis, &UmaxDis, maxNodes );
+
+ if ( !ret )
+ {
+ setInGraph ( 0 );
+ destroyHeap ( Dheap );
+ destroyHeap ( Uheap );
+ return 0;
+ }
+
+ ret = workOnUheap ( Dheap, Uheap, &Dwait, &Uwait, &DmaxDis, &UmaxDis, maxNodes );
+
+ if ( !ret )
+ {
+ setInGraph ( 0 );
+ destroyHeap ( Dheap );
+ destroyHeap ( Uheap );
+ return 0;
+ }
+
+ if ( Uwait && Dwait )
+ {
+ destroyHeap ( Dheap );
+ destroyHeap ( Uheap );
+ return 1;
+ }
+ }
}
/*************************************************
@@ -5973,18 +6529,24 @@ Return:
-1 if the second contig was further than the first contig.
0 if the distances were equal.
*************************************************/
-static int cmp_ctg ( const void * a, const void * b )
+static int cmp_ctg ( const void *a, const void *b )
{
- CTGinHEAP * A, *B;
- A = ( CTGinHEAP * ) a;
- B = ( CTGinHEAP * ) b;
-
- if ( A->dis > B->dis )
- { return 1; }
- else if ( A->dis == B->dis )
- { return 0; }
- else
- { return -1; }
+ CTGinHEAP *A, *B;
+ A = ( CTGinHEAP * ) a;
+ B = ( CTGinHEAP * ) b;
+
+ if ( A->dis > B->dis )
+ {
+ return 1;
+ }
+ else if ( A->dis == B->dis )
+ {
+ return 0;
+ }
+ else
+ {
+ return -1;
+ }
}
/*************************************************
@@ -6009,117 +6571,121 @@ Return:
*************************************************/
static boolean checkEligible()
{
- unsigned int firstNode = ctg4heapArray[1].ctgID;
- unsigned int twin;
- int i;
- boolean flag = 0;
- //check if the first node has incoming link from twin of any node in subgraph
- // or it has multi outgoing links bound to incoming links
- twin = getTwinCtg ( firstNode );
- CONNECT * ite_cnt = contig_array[twin].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->deleted || ite_cnt->mask )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- if ( contig_array[ite_cnt->contigID].inSubGraph )
- {
- return 0;
- }
-
- if ( ite_cnt->prevInScaf )
- {
- if ( flag )
- { return 0; }
-
- flag = 1;
- }
-
- ite_cnt = ite_cnt->next;
- }
-
- //check if the last node has outgoing link to twin of any node in subgraph
- // or it has multi outgoing links bound to incoming links
- unsigned int lastNode = ctg4heapArray[nodeCounter].ctgID;
- ite_cnt = contig_array[lastNode].downwardConnect;
- flag = 0;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->deleted || ite_cnt->mask )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- twin = getTwinCtg ( ite_cnt->contigID );
-
- if ( contig_array[twin].inSubGraph )
- {
- return 0;
- }
-
- if ( ite_cnt->prevInScaf )
- {
- if ( flag )
- { return 0; }
-
- flag = 1;
- }
-
- ite_cnt = ite_cnt->next;
- }
-
- //check if any node has outgoing link to node outside the subgraph
- for ( i = 1; i < nodeCounter; i++ )
- {
- ite_cnt = contig_array[ctg4heapArray[i].ctgID].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->deleted || ite_cnt->mask )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- if ( !contig_array[ite_cnt->contigID].inSubGraph )
- {
- return 0;
- }
-
- ite_cnt = ite_cnt->next;
- }
- }
-
- //check if any node has incoming link from node outside the subgraph
- for ( i = 2; i <= nodeCounter; i++ )
- {
- twin = getTwinCtg ( ctg4heapArray[i].ctgID );
- ite_cnt = contig_array[twin].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->deleted || ite_cnt->mask )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- if ( !contig_array[getTwinCtg ( ite_cnt->contigID )].inSubGraph )
- {
- return 0;
- }
-
- ite_cnt = ite_cnt->next;
- }
- }
-
- return 1;
+ unsigned int firstNode = ctg4heapArray[1].ctgID;
+ unsigned int twin;
+ int i;
+ boolean flag = 0;
+ //check if the first node has incoming link from twin of any node in subgraph
+ // or it has multi outgoing links bound to incoming links
+ twin = getTwinCtg ( firstNode );
+ CONNECT *ite_cnt = contig_array[twin].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->deleted || ite_cnt->mask )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if ( contig_array[ite_cnt->contigID].inSubGraph )
+ {
+ return 0;
+ }
+
+ if ( ite_cnt->prevInScaf )
+ {
+ if ( flag )
+ {
+ return 0;
+ }
+
+ flag = 1;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ //check if the last node has outgoing link to twin of any node in subgraph
+ // or it has multi outgoing links bound to incoming links
+ unsigned int lastNode = ctg4heapArray[nodeCounter].ctgID;
+ ite_cnt = contig_array[lastNode].downwardConnect;
+ flag = 0;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->deleted || ite_cnt->mask )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ twin = getTwinCtg ( ite_cnt->contigID );
+
+ if ( contig_array[twin].inSubGraph )
+ {
+ return 0;
+ }
+
+ if ( ite_cnt->prevInScaf )
+ {
+ if ( flag )
+ {
+ return 0;
+ }
+
+ flag = 1;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ //check if any node has outgoing link to node outside the subgraph
+ for ( i = 1; i < nodeCounter; i++ )
+ {
+ ite_cnt = contig_array[ctg4heapArray[i].ctgID].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->deleted || ite_cnt->mask )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if ( !contig_array[ite_cnt->contigID].inSubGraph )
+ {
+ return 0;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+ }
+
+ //check if any node has incoming link from node outside the subgraph
+ for ( i = 2; i <= nodeCounter; i++ )
+ {
+ twin = getTwinCtg ( ctg4heapArray[i].ctgID );
+ ite_cnt = contig_array[twin].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->deleted || ite_cnt->mask )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if ( !contig_array[getTwinCtg ( ite_cnt->contigID )].inSubGraph )
+ {
+ return 0;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+ }
+
+ return 1;
}
/*************************************************
@@ -6135,14 +6701,14 @@ Output:
Return:
None.
*************************************************/
-static void arrayvalue ( CTGinHEAP * init_array, CTGinHEAP * value_array )
+static void arrayvalue ( CTGinHEAP *init_array, CTGinHEAP *value_array )
{
- init_array->ctgID = value_array->ctgID;
- init_array->dis = value_array->dis;
- init_array->ds_shut4dheap = value_array->ds_shut4dheap;
- init_array->ds_shut4uheap = value_array->ds_shut4uheap;
- init_array->us_shut4dheap = value_array->us_shut4dheap;
- init_array->us_shut4uheap = value_array->us_shut4uheap;
+ init_array->ctgID = value_array->ctgID;
+ init_array->dis = value_array->dis;
+ init_array->ds_shut4dheap = value_array->ds_shut4dheap;
+ init_array->ds_shut4uheap = value_array->ds_shut4uheap;
+ init_array->us_shut4dheap = value_array->us_shut4dheap;
+ init_array->us_shut4uheap = value_array->us_shut4uheap;
}
/*************************************************
@@ -6160,95 +6726,109 @@ Return:
*************************************************/
static void arrayexchange ( unsigned int from_id, unsigned int to_id )
{
- CTGinHEAP tmp_h;
- arrayvalue ( &tmp_h, & ( ctg4heapArray[from_id] ) );
- arrayvalue ( & ( ctg4heapArray[from_id] ), & ( ctg4heapArray[to_id] ) );
- arrayvalue ( & ( ctg4heapArray[to_id] ), &tmp_h );
+ CTGinHEAP tmp_h;
+ arrayvalue ( &tmp_h, & ( ctg4heapArray[from_id] ) );
+ arrayvalue ( & ( ctg4heapArray[from_id] ), & ( ctg4heapArray[to_id] ) );
+ arrayvalue ( & ( ctg4heapArray[to_id] ), &tmp_h );
}
static void deletearray ( unsigned int id )
{
- int i;
+ int i;
- for ( i = 1; i < nodeCounter; i++ )
- {
- if ( i >= id )
- {
- arrayvalue ( & ( ctg4heapArray[i] ), & ( ctg4heapArray[i + 1] ) );
- }
- }
+ for ( i = 1; i < nodeCounter; i++ )
+ {
+ if ( i >= id )
+ {
+ arrayvalue ( & ( ctg4heapArray[i] ), & ( ctg4heapArray[i + 1] ) );
+ }
+ }
- nodeCounter--;
+ nodeCounter--;
}
int getnextInScafCtg ( int id, int mask, int flag )
{
- CONNECT * tmp_cn = contig_array[id].downwardConnect;
- int currId = 0;
-
- while ( tmp_cn )
- {
- if ( tmp_cn->prevInScaf )
- {
- currId = tmp_cn->contigID;
-
- if ( mask != 0 && tmp_cn->contigID != mask )
- { break; }
- }
-
- tmp_cn = tmp_cn->next;
- }
-
- if ( mask == currId && mask != 0 )
- { currId = 0; }
-
- if ( flag == 0 && currId != 0 )
- { currId = getTwinCtg ( currId ); }
-
- return currId;
+ CONNECT *tmp_cn = contig_array[id].downwardConnect;
+ int currId = 0;
+
+ while ( tmp_cn )
+ {
+ if ( tmp_cn->prevInScaf )
+ {
+ currId = tmp_cn->contigID;
+
+ if ( mask != 0 && tmp_cn->contigID != mask )
+ {
+ break;
+ }
+ }
+
+ tmp_cn = tmp_cn->next;
+ }
+
+ if ( mask == currId && mask != 0 )
+ {
+ currId = 0;
+ }
+
+ if ( flag == 0 && currId != 0 )
+ {
+ currId = getTwinCtg ( currId );
+ }
+
+ return currId;
}
void delete_PrevNext ( int i, int flag )
{
- int id = ctg4heapArray[i].ctgID;
- int pid = getTwinCtg ( id );
- CONNECT * ite_cnt = contig_array[id].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->mask || ite_cnt->deleted || !contig_array[ite_cnt->contigID].inSubGraph )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- if ( flag == 1 )
- { setNextInScaf ( ite_cnt, NULL ); }
-
- if ( flag == 0 )
- { setPrevInScaf ( ite_cnt, 0 ); }
-
- ite_cnt = ite_cnt->next;
- }
-
- ite_cnt = contig_array[pid].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->mask || ite_cnt->deleted || !contig_array[ite_cnt->contigID].inSubGraph )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- if ( flag == 0 )
- { setNextInScaf ( ite_cnt, NULL ); }
-
- if ( flag == 1 )
- { setPrevInScaf ( ite_cnt, 0 ); }
-
- ite_cnt = ite_cnt->next;
- }
+ int id = ctg4heapArray[i].ctgID;
+ int pid = getTwinCtg ( id );
+ CONNECT *ite_cnt = contig_array[id].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->mask || ite_cnt->deleted || !contig_array[ite_cnt->contigID].inSubGraph )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if ( flag == 1 )
+ {
+ setNextInScaf ( ite_cnt, NULL );
+ }
+
+ if ( flag == 0 )
+ {
+ setPrevInScaf ( ite_cnt, 0 );
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ ite_cnt = contig_array[pid].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->mask || ite_cnt->deleted || !contig_array[ite_cnt->contigID].inSubGraph )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if ( flag == 0 )
+ {
+ setNextInScaf ( ite_cnt, NULL );
+ }
+
+ if ( flag == 1 )
+ {
+ setPrevInScaf ( ite_cnt, 0 );
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
}
/*************************************************
@@ -6266,31 +6846,31 @@ Output:
Return:
Found downstream contig number.
*************************************************/
-static int getCntNodes ( unsigned int node, unsigned int * nodeArray, unsigned int * gapArray )
+static int getCntNodes ( unsigned int node, unsigned int *nodeArray, unsigned int *gapArray )
{
- int count = 0;
- CONNECT * cnt = contig_array[node].downwardConnect;
+ int count = 0;
+ CONNECT *cnt = contig_array[node].downwardConnect;
- while ( cnt )
- {
- if ( 0 == bySmall && cnt->weight < 3 && !cnt->smallIns && !cnt->bySmall )
- {
- cnt = cnt->next;
- continue;
- }
+ while ( cnt )
+ {
+ if ( 0 == bySmall && cnt->weight < 3 && !cnt->smallIns && !cnt->bySmall )
+ {
+ cnt = cnt->next;
+ continue;
+ }
- nodeArray[count] = cnt->contigID;
- gapArray[count++] = cnt->gapLen;
+ nodeArray[count] = cnt->contigID;
+ gapArray[count++] = cnt->gapLen;
- if ( count == MaxCntNode )
- {
- break;
- }
+ if ( count == MaxCntNode )
+ {
+ break;
+ }
- cnt = cnt->next;
- }
+ cnt = cnt->next;
+ }
- return count;
+ return count;
}
/*************************************************
@@ -6312,35 +6892,35 @@ Output:
Return:
Accumulated distance between these two contigs.
*************************************************/
-static int calGapLen ( int * cntCounter, unsigned int * cntNodeArr, int * cntGapArr,
+static int calGapLen ( int *cntCounter, unsigned int *cntNodeArr, int *cntGapArr,
unsigned int node1, unsigned int node2, unsigned int tmpNode )
{
- int i = 0, gapLen = 0, count = 0;
- unsigned int target_node;
- CONNECT * cnt;
- int len = contig_array[node2].length;
-
- for ( ; i < *cntCounter; ++i )
- {
- cnt = getCntBetween ( tmpNode, cntNodeArr[i] );
-
- if ( cnt && ( cnt->weight >= 3 || bySmall || cnt->smallIns || cnt->bySmall ) )
- {
- if ( tmpNode == node1 )
- {
- gapLen += cnt->gapLen - cntGapArr[i] - len;
- }
- else
- {
- gapLen += cntGapArr[i] - cnt->gapLen - len;
- }
-
- ++count;
- }
- }
-
- *cntCounter = count;
- return gapLen;
+ int i = 0, gapLen = 0, count = 0;
+ unsigned int target_node;
+ CONNECT *cnt;
+ int len = contig_array[node2].length;
+
+ for ( ; i < *cntCounter; ++i )
+ {
+ cnt = getCntBetween ( tmpNode, cntNodeArr[i] );
+
+ if ( cnt && ( cnt->weight >= 3 || bySmall || cnt->smallIns || cnt->bySmall ) )
+ {
+ if ( tmpNode == node1 )
+ {
+ gapLen += cnt->gapLen - cntGapArr[i] - len;
+ }
+ else
+ {
+ gapLen += cntGapArr[i] - cnt->gapLen - len;
+ }
+
+ ++count;
+ }
+ }
+
+ *cntCounter = count;
+ return gapLen;
}
/*************************************************
@@ -6357,540 +6937,546 @@ Return:
*************************************************/
static void arrangeNodes_general()
{
- int i, j, gap, adjustedGap;
- CONNECT * ite_cnt, *temp_cnt, *bal_cnt, *prev_cnt, *next_cnt, *dh_cnt, *three_cnt;
- unsigned int node1, node2, tmpNode;
- unsigned int bal_nd1, bal_nd2;
- unsigned int pre_node, bal_pre_node, next_node, bal_next_node; // pre/next node that connected to first/last node if there is
- unsigned int first_node, bal_first_node, last_node, bal_last_node;
- unsigned int affected_node1, bal_affected_node1, affected_node2, bal_affected_node2;
- unsigned int exchangeNode1 = 0, exchangeNode2 = 0;
- int cntCounter, comCount;
- int tmp_dis;
-
- //delete original connections
- for ( i = 1; i <= nodeCounter; i++ )
- {
- node1 = ctg4heapArray[i].ctgID;
- ite_cnt = contig_array[node1].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->mask || ite_cnt->deleted || !contig_array[ite_cnt->contigID].inSubGraph )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- ite_cnt->deleted = 1;
- setNextInScaf ( ite_cnt, NULL );
- setPrevInScaf ( ite_cnt, 0 );
- ite_cnt = ite_cnt->next;
- }
-
- bal_nd1 = getTwinCtg ( node1 );
- ite_cnt = contig_array[bal_nd1].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->mask || ite_cnt->deleted || !contig_array[getTwinCtg ( ite_cnt->contigID )].inSubGraph )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- ite_cnt->deleted = 1;
- setNextInScaf ( ite_cnt, NULL );
- setPrevInScaf ( ite_cnt, 0 );
- ite_cnt = ite_cnt->next;
- }
- }
-
- CONNECT * first_cnt = NULL, *last_cnt = NULL, *tmp_cnt;
- CONNECT * affected_cnt = NULL, *bal_affected_cnt = NULL; //connections connected to pre_cnt and next_cnt
- pre_node = bal_pre_node = next_node = bal_next_node = 0;
- first_node = ctg4heapArray[1].ctgID;
- bal_first_node = getTwinCtg ( first_node );
- ite_cnt = contig_array[bal_first_node].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->deleted || ite_cnt->mask )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- if ( ite_cnt->prevInScaf )
- {
- if ( !first_cnt )
- {
- first_cnt = ite_cnt;
- }
-
- bal_pre_node = ite_cnt->contigID;
- pre_node = getTwinCtg ( bal_pre_node );
- tmp_cnt = getCntBetween ( pre_node, first_node );
- }
-
- ite_cnt = ite_cnt->next;
- }
-
- last_node = ctg4heapArray[nodeCounter].ctgID;
- bal_last_node = getTwinCtg ( last_node );
- ite_cnt = contig_array[last_node].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->deleted || ite_cnt->mask )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- if ( ite_cnt->prevInScaf )
- {
- if ( !last_cnt )
- {
- last_cnt = ite_cnt;
- }
-
- next_node = ite_cnt->contigID;
- bal_next_node = getTwinCtg ( next_node );
- tmp_cnt = getCntBetween ( bal_next_node, bal_last_node );
- }
-
- ite_cnt = ite_cnt->next;
- }
-
- prev_cnt = next_cnt = NULL;
-
- for ( i = 1; i < nodeCounter; ++i )
- {
- node1 = ctg4heapArray[i].ctgID;
- node2 = ctg4heapArray[i + 1].ctgID;
- bal_nd1 = getTwinCtg ( node1 );
- bal_nd2 = getTwinCtg ( node2 );
- gap = ctg4heapArray[i + 1].dis - ctg4heapArray[i].dis
- - contig_array[node2].length;
- temp_cnt = getCntBetween ( node1, node2 );
- dh_cnt = getCntBetween ( node2, node1 );
-
- if ( i >= 2 )
- { three_cnt = getCntBetween ( ctg4heapArray[i - 1].ctgID, node2 ); }
-
- if ( dh_cnt )
- {
- tmp_dis = ( int ) contig_array[node1].length + ( int ) contig_array[node2].length + gap + dh_cnt->gapLen;
- }
- else
- {
- tmp_dis = -1;
- }
-
- if ( temp_cnt && ( bySmall || temp_cnt->bySmall || temp_cnt->smallIns || !dh_cnt || !dh_cnt->bySmall || !dh_cnt->smallIns ) )
- {
- temp_cnt->deleted = 0;
- temp_cnt->mask = 0;
- bal_cnt = getCntBetween ( bal_nd2, bal_nd1 );
- bal_cnt->deleted = 0;
- bal_cnt->mask = 0;
- }
- else if ( dh_cnt && ( ( dh_cnt->bySmall || dh_cnt->smallIns || bySmall )
- || ( ( -gap > ( int ) contig_array[node1].length || -gap > ( int ) contig_array[node2].length )
- && tmp_dis > 0 && tmp_dis < 500 && dh_cnt->weight > 3 ) ) )
- {
- dh_cnt->deleted = 0;
- dh_cnt->mask = 0;
- dh_cnt = getCntBetween ( bal_nd1, bal_nd2 );
- dh_cnt->deleted = 0;
- dh_cnt->mask = 0;
- arrayexchange ( i, i + 1 );
-
- if ( i == 1 )
- {
- i = 0;
- continue;
- }
-
- if ( i == 2 )
- {
- prev_cnt->deleted = 1;
- prev_cnt = NULL;
- next_cnt->deleted = 1;
- next_cnt = NULL;
- i = 0;
- continue;
- }
-
- bal_affected_node2 = next_cnt->contigID;
- affected_node2 = getTwinCtg ( bal_affected_node2 );
- bal_affected_cnt = next_cnt->nextInScaf;
- bal_affected_node1 = bal_affected_cnt->contigID;
- affected_node1 = getTwinCtg ( bal_affected_node1 );
- affected_cnt = getCntBetween ( affected_node1, affected_node2 );
-
- if ( !affected_cnt )
- {
- fprintf ( stderr, "affected cnt between %u(%u) and %u(%u) doesn't exists!\n", affected_node1, bal_affected_node1, affected_node2, bal_affected_node2 );
- exit ( 1 );
- }
-
- setNextInScaf ( affected_cnt, NULL );
- setPrevInScaf ( bal_affected_cnt, 0 );
- setNextInScaf ( next_cnt, NULL );
- setPrevInScaf ( prev_cnt, 0 );
- prev_cnt->deleted = 1;
- prev_cnt = NULL;
- next_cnt->deleted = 1;
- next_cnt = NULL;
- i -= 3;
- continue;
- }
- else
- {
- if ( ( bySmall > 0 && gap < 0 )
- || ( -gap > ( int ) contig_array[node1].length || -gap > ( int ) contig_array[node2].length )
- && ( i != nodeCounter - 1 ) )
- {
- adjustedGap = comCount = 0;
- uCntCounter1 = getCntNodes ( getTwinCtg ( node1 ), uCntNodeArr1, uCntGapArr1 );
- dCntCounter1 = getCntNodes ( node1, dCntNodeArr1, dCntGapArr1 );
- uCntCounter2 = getCntNodes ( getTwinCtg ( node2 ), uCntNodeArr2, uCntGapArr2 );
- dCntCounter2 = getCntNodes ( node2, dCntNodeArr2, dCntGapArr2 );
-
- if ( uCntCounter1 < uCntCounter2 )
- {
- tmpNode = getTwinCtg ( node2 );
- cntCounter = uCntCounter1;
- cntNodeArr = &uCntNodeArr1[0];
- cntGapArr = &uCntGapArr1[0];
- }
- else
- {
- tmpNode = getTwinCtg ( node1 );
- cntCounter = uCntCounter2;
- cntNodeArr = &uCntNodeArr2[0];
- cntGapArr = &uCntGapArr2[0];
- }
-
- adjustedGap += calGapLen ( &cntCounter, cntNodeArr, cntGapArr, getTwinCtg ( node2 ), getTwinCtg ( node1 ), tmpNode );
- comCount += cntCounter;
-
- if ( dCntCounter1 < dCntCounter2 )
- {
- tmpNode = node2;
- cntCounter = dCntCounter1;
- cntNodeArr = &dCntNodeArr1[0];
- cntGapArr = &dCntGapArr1[0];
- }
- else
- {
- tmpNode = node1;
- cntCounter = dCntCounter2;
- cntNodeArr = &dCntNodeArr2[0];
- cntGapArr = &dCntGapArr2[0];
- }
-
- adjustedGap += calGapLen ( &cntCounter, cntNodeArr, cntGapArr, node1, node2, tmpNode );
- comCount += cntCounter;
-
- if ( comCount > 0 )
- {
- gap = adjustedGap / comCount;
- }
- }
-
- if ( ( -gap > ( int ) contig_array[node1].length || -gap > ( int ) contig_array[node2].length )
- && ( i != nodeCounter - 1 ) && ( ( exchangeNode1 == 0 && exchangeNode2 == 0 )
- || ( exchangeNode1 != ctg4heapArray[i + 1].ctgID && exchangeNode2 != ctg4heapArray[i].ctgID ) ) )
- {
- exchangeNode1 = ctg4heapArray[i].ctgID;
- exchangeNode2 = ctg4heapArray[i + 1].ctgID;
- arrayexchange ( i, i + 1 );
-
- if ( i == 1 )
- {
- i--;
- continue;
- }
-
- if ( i == 2 )
- {
- prev_cnt->deleted = 1;
- prev_cnt = NULL;
- next_cnt->deleted = 1;
- next_cnt = NULL;
- i = 0;
- continue;
- }
-
- bal_affected_node2 = next_cnt->contigID;
- affected_node2 = getTwinCtg ( bal_affected_node2 );
- bal_affected_cnt = next_cnt->nextInScaf;
- bal_affected_node1 = bal_affected_cnt->contigID;
- affected_node1 = getTwinCtg ( bal_affected_node1 );
- affected_cnt = getCntBetween ( affected_node1, affected_node2 );
-
- if ( !affected_cnt )
- {
- fprintf ( stderr, "affected cnt between %u(%u) and %u(%u) doesn't exists!\n", affected_node1, bal_affected_node1, affected_node2, bal_affected_node2 );
- exit ( 1 );
- }
-
- setNextInScaf ( affected_cnt, NULL );
- setPrevInScaf ( bal_affected_cnt, 0 );
- setNextInScaf ( next_cnt, NULL );
- setPrevInScaf ( prev_cnt, 0 );
- prev_cnt->deleted = 1;
- prev_cnt = NULL;
- next_cnt->deleted = 1;
- next_cnt = NULL;
- i -= 3;
- continue;
- }
-
- temp_cnt = allocateCN ( node2, gap );
-
- if ( cntLookupTable )
- { putCnt2LookupTable ( node1, temp_cnt ); }
-
- temp_cnt->weight = 0;
- temp_cnt->next = contig_array[node1].downwardConnect;
- contig_array[node1].downwardConnect = temp_cnt;
- bal_cnt = allocateCN ( bal_nd1, gap );
-
- if ( cntLookupTable )
- { putCnt2LookupTable ( bal_nd2, bal_cnt ); }
-
- bal_cnt->weight = 0;
- bal_cnt->next = contig_array[bal_nd2].downwardConnect;
- contig_array[bal_nd2].downwardConnect = bal_cnt;
- }
-
- if ( prev_cnt )
- {
- setNextInScaf ( prev_cnt, temp_cnt );
- setPrevInScaf ( temp_cnt, 1 );
- }
-
- if ( next_cnt )
- {
- setNextInScaf ( bal_cnt, next_cnt );
- setPrevInScaf ( next_cnt, 1 );
- }
-
- prev_cnt = temp_cnt;
- next_cnt = bal_cnt;
- }
-
- if ( first_cnt )
- {
- if ( ctg4heapArray[1].ctgID == first_node )
- {
- bal_nd1 = first_cnt->contigID;
- node1 = getTwinCtg ( bal_nd1 );
- node2 = first_node;
- temp_cnt = checkConnect ( node1, node2 );
- bal_cnt = first_cnt;
- next_cnt = checkConnect ( ctg4heapArray[1].ctgID, ctg4heapArray[2].ctgID );
- prev_cnt = checkConnect ( getTwinCtg ( ctg4heapArray[2].ctgID ), getTwinCtg ( ctg4heapArray[1].ctgID ) );
-
- if ( temp_cnt )
- {
- setNextInScaf ( temp_cnt, next_cnt );
- setPrevInScaf ( temp_cnt->nextInScaf, 0 );
- setPrevInScaf ( next_cnt, 1 );
- setNextInScaf ( prev_cnt, bal_cnt );
- }
- }
- else
- {
- bal_pre_node = first_cnt->contigID;
- pre_node = getTwinCtg ( bal_pre_node );
- j = 1;
- node1 = ctg4heapArray[j].ctgID;
- node2 = ctg4heapArray[j + 1].ctgID;
- ite_cnt = getCntBetween ( pre_node, node1 );
- bal_cnt = getCntBetween ( getTwinCtg ( node1 ), bal_pre_node );
-
- while ( !ite_cnt && node2 != first_node )
- {
- tmp_cnt = getCntBetween ( node1, node2 );
- bal_cnt = getCntBetween ( getTwinCtg ( node2 ), getTwinCtg ( node1 ) );
- setNextInScaf ( tmp_cnt, NULL );
- setPrevInScaf ( tmp_cnt, 0 );
- tmp_cnt->deleted = 1;
- setNextInScaf ( bal_cnt, NULL );
- setPrevInScaf ( bal_cnt, 0 );
- bal_cnt->deleted = 1;
- ++j;
- node1 = ctg4heapArray[j].ctgID;
- node2 = ctg4heapArray[j + 1].ctgID;
- ite_cnt = getCntBetween ( pre_node, node1 );
- bal_cnt = getCntBetween ( getTwinCtg ( node1 ), bal_pre_node );
- }
-
- if ( !ite_cnt )
- {
- tmp_cnt = getCntBetween ( node1, first_node );
- gap = first_cnt->gapLen - tmp_cnt->gapLen - contig_array[node1].length;
- ite_cnt = allocateCN ( node1, gap );
- ite_cnt->weight = 0;
-
- if ( cntLookupTable )
- {
- putCnt2LookupTable ( pre_node, ite_cnt );
- }
-
- ite_cnt->next = contig_array[pre_node].downwardConnect;
- contig_array[pre_node].downwardConnect = ite_cnt;
- bal_cnt = allocateCN ( bal_pre_node, gap );
- bal_cnt->weight = 0;
-
- if ( cntLookupTable )
- {
- putCnt2LookupTable ( getTwinCtg ( node1 ), bal_cnt );
- }
-
- bal_cnt->next = contig_array[getTwinCtg ( node1 )].downwardConnect;
- contig_array[getTwinCtg ( node1 )].downwardConnect = bal_cnt;
- }
-
- ite_cnt->deleted = 0;
- ite_cnt->mask = 0;
- bal_cnt->deleted = 0;
- bal_cnt->mask = 0;
- tmp_cnt = getCntBetween ( node1, node2 );
- setNextInScaf ( ite_cnt, tmp_cnt );
- setPrevInScaf ( tmp_cnt, 1 );
- tmp_cnt = getCntBetween ( getTwinCtg ( node2 ), getTwinCtg ( node1 ) );
- setNextInScaf ( tmp_cnt, bal_cnt );
- setPrevInScaf ( bal_cnt, 1 );
-
- if ( first_cnt->nextInScaf )
- {
- setNextInScaf ( bal_cnt, first_cnt->nextInScaf );
- bal_affected_node1 = first_cnt->nextInScaf->contigID;
- affected_node1 = getTwinCtg ( bal_affected_node1 );
- affected_cnt = getCntBetween ( affected_node1, pre_node );
- setNextInScaf ( affected_cnt, ite_cnt );
- setPrevInScaf ( ite_cnt, 1 );
- }
-
- setNextInScaf ( first_cnt, NULL );
- setPrevInScaf ( first_cnt, 0 );
- first_cnt->deleted = 1;
- first_cnt->mask = 1;
- tmp_cnt = getCntBetween ( pre_node, first_node );
- setNextInScaf ( tmp_cnt, NULL );
- setPrevInScaf ( tmp_cnt, 0 );
- tmp_cnt->deleted = 1;
- tmp_cnt->mask = 1;
- }
- }
-
- if ( last_cnt )
- {
- node1 = ctg4heapArray[nodeCounter].ctgID;
-
- if ( node1 == last_node )
- {
- node2 = last_cnt->contigID;
- bal_nd1 = getTwinCtg ( node1 );
- bal_nd2 = getTwinCtg ( node2 );
- temp_cnt = last_cnt;
- bal_cnt = checkConnect ( bal_nd2, bal_nd1 );
- next_cnt = checkConnect ( getTwinCtg ( ctg4heapArray[nodeCounter].ctgID ),
- getTwinCtg ( ctg4heapArray[nodeCounter - 1].ctgID ) );
- prev_cnt = checkConnect ( ctg4heapArray[nodeCounter - 1].ctgID, ctg4heapArray[nodeCounter].ctgID );
- setNextInScaf ( prev_cnt, temp_cnt );
- setNextInScaf ( bal_cnt, next_cnt );
- setPrevInScaf ( next_cnt, 1 );
- }
- else
- {
- next_node = last_cnt->contigID;
- bal_next_node = getTwinCtg ( next_node );
- j = nodeCounter;
- node1 = ctg4heapArray[j - 1].ctgID;
- node2 = ctg4heapArray[j].ctgID;
- ite_cnt = getCntBetween ( node2, next_node );
- bal_cnt = getCntBetween ( bal_next_node, getTwinCtg ( node2 ) );
-
- while ( !ite_cnt && node1 != last_node )
- {
- tmp_cnt = getCntBetween ( node1, node2 );
- bal_cnt = getCntBetween ( getTwinCtg ( node2 ), getTwinCtg ( node1 ) );
- setNextInScaf ( tmp_cnt, NULL );
- setPrevInScaf ( tmp_cnt, 0 );
- tmp_cnt->deleted = 1;
- setNextInScaf ( bal_cnt, NULL );
- setPrevInScaf ( bal_cnt, 0 );
- bal_cnt->deleted = 1;
- --j;
- node1 = ctg4heapArray[j - 1].ctgID;
- node2 = ctg4heapArray[j].ctgID;
- ite_cnt = getCntBetween ( node2, next_node );
- bal_cnt = getCntBetween ( bal_next_node, getTwinCtg ( node2 ) );
- }
-
- if ( !ite_cnt )
- {
- tmp_cnt = getCntBetween ( node1, node2 );
- gap = last_cnt->gapLen - tmp_cnt->gapLen - contig_array[node2].length;
- ite_cnt = allocateCN ( next_node, gap );
- ite_cnt->weight = 0;
-
- if ( cntLookupTable )
- {
- putCnt2LookupTable ( node2, ite_cnt );
- }
-
- ite_cnt->next = contig_array[node2].downwardConnect;
- contig_array[node2].downwardConnect = ite_cnt;
- bal_cnt = allocateCN ( getTwinCtg ( node2 ), gap );
- bal_cnt->weight = 0;
-
- if ( cntLookupTable )
- {
- putCnt2LookupTable ( bal_next_node, bal_cnt );
- }
-
- bal_cnt->next = contig_array[bal_next_node].downwardConnect;
- contig_array[bal_next_node].downwardConnect = bal_cnt;
- }
-
- ite_cnt->deleted = 0;
- ite_cnt->mask = 0;
- bal_cnt->deleted = 0;
- bal_cnt->mask = 0;
- tmp_cnt = getCntBetween ( node1, node2 );
- setNextInScaf ( tmp_cnt, ite_cnt );
- setPrevInScaf ( ite_cnt, 1 );
- tmp_cnt = getCntBetween ( getTwinCtg ( node2 ), getTwinCtg ( node1 ) );
- setNextInScaf ( bal_cnt, tmp_cnt );
- setPrevInScaf ( tmp_cnt, 1 );
-
- if ( last_cnt->nextInScaf )
- {
- setNextInScaf ( ite_cnt, last_cnt->nextInScaf );
- affected_node1 = last_cnt->nextInScaf->contigID;
- bal_affected_node1 = getTwinCtg ( affected_node1 );
- bal_affected_cnt = getCntBetween ( bal_affected_node1, bal_next_node );
- setNextInScaf ( bal_affected_cnt, bal_cnt );
- setPrevInScaf ( bal_cnt, 1 );
- }
-
- setNextInScaf ( last_cnt, NULL );
- setPrevInScaf ( last_cnt, 0 );
- last_cnt->deleted = 1;
- tmp_cnt = getCntBetween ( bal_next_node, bal_last_node );
- setNextInScaf ( tmp_cnt, NULL );
- setPrevInScaf ( tmp_cnt, 0 );
- tmp_cnt->deleted = 1;
- }
- }
+ int i, j, gap, adjustedGap;
+ CONNECT *ite_cnt, *temp_cnt, *bal_cnt, *prev_cnt, *next_cnt, *dh_cnt, *three_cnt;
+ unsigned int node1, node2, tmpNode;
+ unsigned int bal_nd1, bal_nd2;
+ unsigned int pre_node, bal_pre_node, next_node, bal_next_node; // pre/next node that connected to first/last node if there is
+ unsigned int first_node, bal_first_node, last_node, bal_last_node;
+ unsigned int affected_node1, bal_affected_node1, affected_node2, bal_affected_node2;
+ unsigned int exchangeNode1 = 0, exchangeNode2 = 0;
+ int cntCounter, comCount;
+ int tmp_dis;
+
+ //delete original connections
+ for ( i = 1; i <= nodeCounter; i++ )
+ {
+ node1 = ctg4heapArray[i].ctgID;
+ ite_cnt = contig_array[node1].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->mask || ite_cnt->deleted || !contig_array[ite_cnt->contigID].inSubGraph )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ ite_cnt->deleted = 1;
+ setNextInScaf ( ite_cnt, NULL );
+ setPrevInScaf ( ite_cnt, 0 );
+ ite_cnt = ite_cnt->next;
+ }
+
+ bal_nd1 = getTwinCtg ( node1 );
+ ite_cnt = contig_array[bal_nd1].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->mask || ite_cnt->deleted || !contig_array[getTwinCtg ( ite_cnt->contigID )].inSubGraph )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ ite_cnt->deleted = 1;
+ setNextInScaf ( ite_cnt, NULL );
+ setPrevInScaf ( ite_cnt, 0 );
+ ite_cnt = ite_cnt->next;
+ }
+ }
+
+ CONNECT *first_cnt = NULL, *last_cnt = NULL, *tmp_cnt;
+ CONNECT *affected_cnt = NULL, *bal_affected_cnt = NULL; //connections connected to pre_cnt and next_cnt
+ pre_node = bal_pre_node = next_node = bal_next_node = 0;
+ first_node = ctg4heapArray[1].ctgID;
+ bal_first_node = getTwinCtg ( first_node );
+ ite_cnt = contig_array[bal_first_node].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->deleted || ite_cnt->mask )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if ( ite_cnt->prevInScaf )
+ {
+ if ( !first_cnt )
+ {
+ first_cnt = ite_cnt;
+ }
+
+ bal_pre_node = ite_cnt->contigID;
+ pre_node = getTwinCtg ( bal_pre_node );
+ tmp_cnt = getCntBetween ( pre_node, first_node );
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ last_node = ctg4heapArray[nodeCounter].ctgID;
+ bal_last_node = getTwinCtg ( last_node );
+ ite_cnt = contig_array[last_node].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->deleted || ite_cnt->mask )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if ( ite_cnt->prevInScaf )
+ {
+ if ( !last_cnt )
+ {
+ last_cnt = ite_cnt;
+ }
+
+ next_node = ite_cnt->contigID;
+ bal_next_node = getTwinCtg ( next_node );
+ tmp_cnt = getCntBetween ( bal_next_node, bal_last_node );
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+
+ prev_cnt = next_cnt = NULL;
+
+ for ( i = 1; i < nodeCounter; ++i )
+ {
+ node1 = ctg4heapArray[i].ctgID;
+ node2 = ctg4heapArray[i + 1].ctgID;
+ bal_nd1 = getTwinCtg ( node1 );
+ bal_nd2 = getTwinCtg ( node2 );
+ gap = ctg4heapArray[i + 1].dis - ctg4heapArray[i].dis
+ - contig_array[node2].length;
+ temp_cnt = getCntBetween ( node1, node2 );
+ dh_cnt = getCntBetween ( node2, node1 );
+
+ if ( i >= 2 )
+ {
+ three_cnt = getCntBetween ( ctg4heapArray[i - 1].ctgID, node2 );
+ }
+
+ if ( dh_cnt )
+ {
+ tmp_dis = ( int ) contig_array[node1].length + ( int ) contig_array[node2].length + gap + dh_cnt->gapLen;
+ }
+ else
+ {
+ tmp_dis = -1;
+ }
+
+ if ( temp_cnt && ( bySmall || temp_cnt->bySmall || temp_cnt->smallIns || !dh_cnt || !dh_cnt->bySmall || !dh_cnt->smallIns ) )
+ {
+ temp_cnt->deleted = 0;
+ temp_cnt->mask = 0;
+ bal_cnt = getCntBetween ( bal_nd2, bal_nd1 );
+ bal_cnt->deleted = 0;
+ bal_cnt->mask = 0;
+ }
+ else if ( dh_cnt && ( ( dh_cnt->bySmall || dh_cnt->smallIns || bySmall )
+ || ( ( -gap > ( int ) contig_array[node1].length || -gap > ( int ) contig_array[node2].length )
+ && tmp_dis > 0 && tmp_dis < 500 && dh_cnt->weight > 3 ) ) )
+ {
+ dh_cnt->deleted = 0;
+ dh_cnt->mask = 0;
+ dh_cnt = getCntBetween ( bal_nd1, bal_nd2 );
+ dh_cnt->deleted = 0;
+ dh_cnt->mask = 0;
+ arrayexchange ( i, i + 1 );
+
+ if ( i == 1 )
+ {
+ i = 0;
+ continue;
+ }
+
+ if ( i == 2 )
+ {
+ prev_cnt->deleted = 1;
+ prev_cnt = NULL;
+ next_cnt->deleted = 1;
+ next_cnt = NULL;
+ i = 0;
+ continue;
+ }
+
+ bal_affected_node2 = next_cnt->contigID;
+ affected_node2 = getTwinCtg ( bal_affected_node2 );
+ bal_affected_cnt = next_cnt->nextInScaf;
+ bal_affected_node1 = bal_affected_cnt->contigID;
+ affected_node1 = getTwinCtg ( bal_affected_node1 );
+ affected_cnt = getCntBetween ( affected_node1, affected_node2 );
+
+ if ( !affected_cnt )
+ {
+ fprintf ( stderr, "affected cnt between %u(%u) and %u(%u) doesn't exists!\n", affected_node1, bal_affected_node1, affected_node2, bal_affected_node2 );
+ exit ( 1 );
+ }
+
+ setNextInScaf ( affected_cnt, NULL );
+ setPrevInScaf ( bal_affected_cnt, 0 );
+ setNextInScaf ( next_cnt, NULL );
+ setPrevInScaf ( prev_cnt, 0 );
+ prev_cnt->deleted = 1;
+ prev_cnt = NULL;
+ next_cnt->deleted = 1;
+ next_cnt = NULL;
+ i -= 3;
+ continue;
+ }
+ else
+ {
+ if ( ( bySmall > 0 && gap < 0 )
+ || ( -gap > ( int ) contig_array[node1].length || -gap > ( int ) contig_array[node2].length )
+ && ( i != nodeCounter - 1 ) )
+ {
+ adjustedGap = comCount = 0;
+ uCntCounter1 = getCntNodes ( getTwinCtg ( node1 ), uCntNodeArr1, uCntGapArr1 );
+ dCntCounter1 = getCntNodes ( node1, dCntNodeArr1, dCntGapArr1 );
+ uCntCounter2 = getCntNodes ( getTwinCtg ( node2 ), uCntNodeArr2, uCntGapArr2 );
+ dCntCounter2 = getCntNodes ( node2, dCntNodeArr2, dCntGapArr2 );
+
+ if ( uCntCounter1 < uCntCounter2 )
+ {
+ tmpNode = getTwinCtg ( node2 );
+ cntCounter = uCntCounter1;
+ cntNodeArr = &uCntNodeArr1[0];
+ cntGapArr = &uCntGapArr1[0];
+ }
+ else
+ {
+ tmpNode = getTwinCtg ( node1 );
+ cntCounter = uCntCounter2;
+ cntNodeArr = &uCntNodeArr2[0];
+ cntGapArr = &uCntGapArr2[0];
+ }
+
+ adjustedGap += calGapLen ( &cntCounter, cntNodeArr, cntGapArr, getTwinCtg ( node2 ), getTwinCtg ( node1 ), tmpNode );
+ comCount += cntCounter;
+
+ if ( dCntCounter1 < dCntCounter2 )
+ {
+ tmpNode = node2;
+ cntCounter = dCntCounter1;
+ cntNodeArr = &dCntNodeArr1[0];
+ cntGapArr = &dCntGapArr1[0];
+ }
+ else
+ {
+ tmpNode = node1;
+ cntCounter = dCntCounter2;
+ cntNodeArr = &dCntNodeArr2[0];
+ cntGapArr = &dCntGapArr2[0];
+ }
+
+ adjustedGap += calGapLen ( &cntCounter, cntNodeArr, cntGapArr, node1, node2, tmpNode );
+ comCount += cntCounter;
+
+ if ( comCount > 0 )
+ {
+ gap = adjustedGap / comCount;
+ }
+ }
+
+ if ( ( -gap > ( int ) contig_array[node1].length || -gap > ( int ) contig_array[node2].length )
+ && ( i != nodeCounter - 1 ) && ( ( exchangeNode1 == 0 && exchangeNode2 == 0 )
+ || ( exchangeNode1 != ctg4heapArray[i + 1].ctgID && exchangeNode2 != ctg4heapArray[i].ctgID ) ) )
+ {
+ exchangeNode1 = ctg4heapArray[i].ctgID;
+ exchangeNode2 = ctg4heapArray[i + 1].ctgID;
+ arrayexchange ( i, i + 1 );
+
+ if ( i == 1 )
+ {
+ i--;
+ continue;
+ }
+
+ if ( i == 2 )
+ {
+ prev_cnt->deleted = 1;
+ prev_cnt = NULL;
+ next_cnt->deleted = 1;
+ next_cnt = NULL;
+ i = 0;
+ continue;
+ }
+
+ bal_affected_node2 = next_cnt->contigID;
+ affected_node2 = getTwinCtg ( bal_affected_node2 );
+ bal_affected_cnt = next_cnt->nextInScaf;
+ bal_affected_node1 = bal_affected_cnt->contigID;
+ affected_node1 = getTwinCtg ( bal_affected_node1 );
+ affected_cnt = getCntBetween ( affected_node1, affected_node2 );
+
+ if ( !affected_cnt )
+ {
+ fprintf ( stderr, "affected cnt between %u(%u) and %u(%u) doesn't exists!\n", affected_node1, bal_affected_node1, affected_node2, bal_affected_node2 );
+ exit ( 1 );
+ }
+
+ setNextInScaf ( affected_cnt, NULL );
+ setPrevInScaf ( bal_affected_cnt, 0 );
+ setNextInScaf ( next_cnt, NULL );
+ setPrevInScaf ( prev_cnt, 0 );
+ prev_cnt->deleted = 1;
+ prev_cnt = NULL;
+ next_cnt->deleted = 1;
+ next_cnt = NULL;
+ i -= 3;
+ continue;
+ }
+
+ temp_cnt = allocateCN ( node2, gap );
+
+ if ( cntLookupTable )
+ {
+ putCnt2LookupTable ( node1, temp_cnt );
+ }
+
+ temp_cnt->weight = 0;
+ temp_cnt->next = contig_array[node1].downwardConnect;
+ contig_array[node1].downwardConnect = temp_cnt;
+ bal_cnt = allocateCN ( bal_nd1, gap );
+
+ if ( cntLookupTable )
+ {
+ putCnt2LookupTable ( bal_nd2, bal_cnt );
+ }
+
+ bal_cnt->weight = 0;
+ bal_cnt->next = contig_array[bal_nd2].downwardConnect;
+ contig_array[bal_nd2].downwardConnect = bal_cnt;
+ }
+
+ if ( prev_cnt )
+ {
+ setNextInScaf ( prev_cnt, temp_cnt );
+ setPrevInScaf ( temp_cnt, 1 );
+ }
+
+ if ( next_cnt )
+ {
+ setNextInScaf ( bal_cnt, next_cnt );
+ setPrevInScaf ( next_cnt, 1 );
+ }
+
+ prev_cnt = temp_cnt;
+ next_cnt = bal_cnt;
+ }
+
+ if ( first_cnt )
+ {
+ if ( ctg4heapArray[1].ctgID == first_node )
+ {
+ bal_nd1 = first_cnt->contigID;
+ node1 = getTwinCtg ( bal_nd1 );
+ node2 = first_node;
+ temp_cnt = checkConnect ( node1, node2 );
+ bal_cnt = first_cnt;
+ next_cnt = checkConnect ( ctg4heapArray[1].ctgID, ctg4heapArray[2].ctgID );
+ prev_cnt = checkConnect ( getTwinCtg ( ctg4heapArray[2].ctgID ), getTwinCtg ( ctg4heapArray[1].ctgID ) );
+
+ if ( temp_cnt )
+ {
+ setNextInScaf ( temp_cnt, next_cnt );
+ setPrevInScaf ( temp_cnt->nextInScaf, 0 );
+ setPrevInScaf ( next_cnt, 1 );
+ setNextInScaf ( prev_cnt, bal_cnt );
+ }
+ }
+ else
+ {
+ bal_pre_node = first_cnt->contigID;
+ pre_node = getTwinCtg ( bal_pre_node );
+ j = 1;
+ node1 = ctg4heapArray[j].ctgID;
+ node2 = ctg4heapArray[j + 1].ctgID;
+ ite_cnt = getCntBetween ( pre_node, node1 );
+ bal_cnt = getCntBetween ( getTwinCtg ( node1 ), bal_pre_node );
+
+ while ( !ite_cnt && node2 != first_node )
+ {
+ tmp_cnt = getCntBetween ( node1, node2 );
+ bal_cnt = getCntBetween ( getTwinCtg ( node2 ), getTwinCtg ( node1 ) );
+ setNextInScaf ( tmp_cnt, NULL );
+ setPrevInScaf ( tmp_cnt, 0 );
+ tmp_cnt->deleted = 1;
+ setNextInScaf ( bal_cnt, NULL );
+ setPrevInScaf ( bal_cnt, 0 );
+ bal_cnt->deleted = 1;
+ ++j;
+ node1 = ctg4heapArray[j].ctgID;
+ node2 = ctg4heapArray[j + 1].ctgID;
+ ite_cnt = getCntBetween ( pre_node, node1 );
+ bal_cnt = getCntBetween ( getTwinCtg ( node1 ), bal_pre_node );
+ }
+
+ if ( !ite_cnt )
+ {
+ tmp_cnt = getCntBetween ( node1, first_node );
+ gap = first_cnt->gapLen - tmp_cnt->gapLen - contig_array[node1].length;
+ ite_cnt = allocateCN ( node1, gap );
+ ite_cnt->weight = 0;
+
+ if ( cntLookupTable )
+ {
+ putCnt2LookupTable ( pre_node, ite_cnt );
+ }
+
+ ite_cnt->next = contig_array[pre_node].downwardConnect;
+ contig_array[pre_node].downwardConnect = ite_cnt;
+ bal_cnt = allocateCN ( bal_pre_node, gap );
+ bal_cnt->weight = 0;
+
+ if ( cntLookupTable )
+ {
+ putCnt2LookupTable ( getTwinCtg ( node1 ), bal_cnt );
+ }
+
+ bal_cnt->next = contig_array[getTwinCtg ( node1 )].downwardConnect;
+ contig_array[getTwinCtg ( node1 )].downwardConnect = bal_cnt;
+ }
+
+ ite_cnt->deleted = 0;
+ ite_cnt->mask = 0;
+ bal_cnt->deleted = 0;
+ bal_cnt->mask = 0;
+ tmp_cnt = getCntBetween ( node1, node2 );
+ setNextInScaf ( ite_cnt, tmp_cnt );
+ setPrevInScaf ( tmp_cnt, 1 );
+ tmp_cnt = getCntBetween ( getTwinCtg ( node2 ), getTwinCtg ( node1 ) );
+ setNextInScaf ( tmp_cnt, bal_cnt );
+ setPrevInScaf ( bal_cnt, 1 );
+
+ if ( first_cnt->nextInScaf )
+ {
+ setNextInScaf ( bal_cnt, first_cnt->nextInScaf );
+ bal_affected_node1 = first_cnt->nextInScaf->contigID;
+ affected_node1 = getTwinCtg ( bal_affected_node1 );
+ affected_cnt = getCntBetween ( affected_node1, pre_node );
+ setNextInScaf ( affected_cnt, ite_cnt );
+ setPrevInScaf ( ite_cnt, 1 );
+ }
+
+ setNextInScaf ( first_cnt, NULL );
+ setPrevInScaf ( first_cnt, 0 );
+ first_cnt->deleted = 1;
+ first_cnt->mask = 1;
+ tmp_cnt = getCntBetween ( pre_node, first_node );
+ setNextInScaf ( tmp_cnt, NULL );
+ setPrevInScaf ( tmp_cnt, 0 );
+ tmp_cnt->deleted = 1;
+ tmp_cnt->mask = 1;
+ }
+ }
+
+ if ( last_cnt )
+ {
+ node1 = ctg4heapArray[nodeCounter].ctgID;
+
+ if ( node1 == last_node )
+ {
+ node2 = last_cnt->contigID;
+ bal_nd1 = getTwinCtg ( node1 );
+ bal_nd2 = getTwinCtg ( node2 );
+ temp_cnt = last_cnt;
+ bal_cnt = checkConnect ( bal_nd2, bal_nd1 );
+ next_cnt = checkConnect ( getTwinCtg ( ctg4heapArray[nodeCounter].ctgID ),
+ getTwinCtg ( ctg4heapArray[nodeCounter - 1].ctgID ) );
+ prev_cnt = checkConnect ( ctg4heapArray[nodeCounter - 1].ctgID, ctg4heapArray[nodeCounter].ctgID );
+ setNextInScaf ( prev_cnt, temp_cnt );
+ setNextInScaf ( bal_cnt, next_cnt );
+ setPrevInScaf ( next_cnt, 1 );
+ }
+ else
+ {
+ next_node = last_cnt->contigID;
+ bal_next_node = getTwinCtg ( next_node );
+ j = nodeCounter;
+ node1 = ctg4heapArray[j - 1].ctgID;
+ node2 = ctg4heapArray[j].ctgID;
+ ite_cnt = getCntBetween ( node2, next_node );
+ bal_cnt = getCntBetween ( bal_next_node, getTwinCtg ( node2 ) );
+
+ while ( !ite_cnt && node1 != last_node )
+ {
+ tmp_cnt = getCntBetween ( node1, node2 );
+ bal_cnt = getCntBetween ( getTwinCtg ( node2 ), getTwinCtg ( node1 ) );
+ setNextInScaf ( tmp_cnt, NULL );
+ setPrevInScaf ( tmp_cnt, 0 );
+ tmp_cnt->deleted = 1;
+ setNextInScaf ( bal_cnt, NULL );
+ setPrevInScaf ( bal_cnt, 0 );
+ bal_cnt->deleted = 1;
+ --j;
+ node1 = ctg4heapArray[j - 1].ctgID;
+ node2 = ctg4heapArray[j].ctgID;
+ ite_cnt = getCntBetween ( node2, next_node );
+ bal_cnt = getCntBetween ( bal_next_node, getTwinCtg ( node2 ) );
+ }
+
+ if ( !ite_cnt )
+ {
+ tmp_cnt = getCntBetween ( node1, node2 );
+ gap = last_cnt->gapLen - tmp_cnt->gapLen - contig_array[node2].length;
+ ite_cnt = allocateCN ( next_node, gap );
+ ite_cnt->weight = 0;
+
+ if ( cntLookupTable )
+ {
+ putCnt2LookupTable ( node2, ite_cnt );
+ }
+
+ ite_cnt->next = contig_array[node2].downwardConnect;
+ contig_array[node2].downwardConnect = ite_cnt;
+ bal_cnt = allocateCN ( getTwinCtg ( node2 ), gap );
+ bal_cnt->weight = 0;
+
+ if ( cntLookupTable )
+ {
+ putCnt2LookupTable ( bal_next_node, bal_cnt );
+ }
+
+ bal_cnt->next = contig_array[bal_next_node].downwardConnect;
+ contig_array[bal_next_node].downwardConnect = bal_cnt;
+ }
+
+ ite_cnt->deleted = 0;
+ ite_cnt->mask = 0;
+ bal_cnt->deleted = 0;
+ bal_cnt->mask = 0;
+ tmp_cnt = getCntBetween ( node1, node2 );
+ setNextInScaf ( tmp_cnt, ite_cnt );
+ setPrevInScaf ( ite_cnt, 1 );
+ tmp_cnt = getCntBetween ( getTwinCtg ( node2 ), getTwinCtg ( node1 ) );
+ setNextInScaf ( bal_cnt, tmp_cnt );
+ setPrevInScaf ( tmp_cnt, 1 );
+
+ if ( last_cnt->nextInScaf )
+ {
+ setNextInScaf ( ite_cnt, last_cnt->nextInScaf );
+ affected_node1 = last_cnt->nextInScaf->contigID;
+ bal_affected_node1 = getTwinCtg ( affected_node1 );
+ bal_affected_cnt = getCntBetween ( bal_affected_node1, bal_next_node );
+ setNextInScaf ( bal_affected_cnt, bal_cnt );
+ setPrevInScaf ( bal_cnt, 1 );
+ }
+
+ setNextInScaf ( last_cnt, NULL );
+ setPrevInScaf ( last_cnt, 0 );
+ last_cnt->deleted = 1;
+ tmp_cnt = getCntBetween ( bal_next_node, bal_last_node );
+ setNextInScaf ( tmp_cnt, NULL );
+ setPrevInScaf ( tmp_cnt, 0 );
+ tmp_cnt->deleted = 1;
+ }
+ }
}
/*************************************************
@@ -6907,54 +7493,58 @@ Return:
*************************************************/
boolean checkOverlapInBetween_general ( double tolerance )
{
- int i, gap;
- unsigned int node1, node2;
- int lenSum, lenOlp;
- CONNECT * cnt;
- lenSum = lenOlp = 0;
-
- for ( i = 1; i <= nodeCounter; i++ )
- {
- node1 = ctg4heapArray[i].ctgID;
- lenSum += contig_array[node1].length;
- }
-
- if ( lenSum < 1 )
- { return 0; }
-
- for ( i = 1; i < nodeCounter; i++ )
- {
- node2 = ctg4heapArray[i + 1].ctgID;
- gap = ctg4heapArray[i + 1].dis - ctg4heapArray[i].dis
- - contig_array[node2].length;
-
- if ( -gap > 0 )
- {
- node1 = ctg4heapArray[i].ctgID;
- cnt = getCntBetween ( node1, node2 );
-
- if ( cnt && cnt->gapLen > gap )
- {
- continue;
- }
- else if ( ( cnt = getCntBetween ( node2, node1 ) ) != NULL
- && cnt->gapLen > gap )
- {
- continue;
- }
- else if ( -gap < overlaplen )
- {
- continue;
- }
-
- lenOlp += -gap;
- }
-
- if ( ( double ) lenOlp / lenSum > tolerance )
- { return 0; }
- }
-
- return 1;
+ int i, gap;
+ unsigned int node1, node2;
+ int lenSum, lenOlp;
+ CONNECT *cnt;
+ lenSum = lenOlp = 0;
+
+ for ( i = 1; i <= nodeCounter; i++ )
+ {
+ node1 = ctg4heapArray[i].ctgID;
+ lenSum += contig_array[node1].length;
+ }
+
+ if ( lenSum < 1 )
+ {
+ return 0;
+ }
+
+ for ( i = 1; i < nodeCounter; i++ )
+ {
+ node2 = ctg4heapArray[i + 1].ctgID;
+ gap = ctg4heapArray[i + 1].dis - ctg4heapArray[i].dis
+ - contig_array[node2].length;
+
+ if ( -gap > 0 )
+ {
+ node1 = ctg4heapArray[i].ctgID;
+ cnt = getCntBetween ( node1, node2 );
+
+ if ( cnt && cnt->gapLen > gap )
+ {
+ continue;
+ }
+ else if ( ( cnt = getCntBetween ( node2, node1 ) ) != NULL
+ && cnt->gapLen > gap )
+ {
+ continue;
+ }
+ else if ( -gap < overlaplen )
+ {
+ continue;
+ }
+
+ lenOlp += -gap;
+ }
+
+ if ( ( double ) lenOlp / lenSum > tolerance )
+ {
+ return 0;
+ }
+ }
+
+ return 1;
}
int canexchange = 0, exchange_num = 0, failexchange = 0;
@@ -6974,41 +7564,47 @@ Return:
*************************************************/
static boolean checkConflictCnt_general ( double tolerance )
{
- int i, j, gap;
- int supportCounter = 0;
- int objectCounter = 0;
- CONNECT * cnt;
-
- for ( i = 1; i < nodeCounter; i++ )
- {
- for ( j = i + 1; j <= nodeCounter; j++ )
- {
- cnt = checkConnect ( ctg4heapArray[i].ctgID, ctg4heapArray[j].ctgID );
-
- if ( cnt )
- { supportCounter += cnt->weight; }
-
- cnt = checkConnect ( ctg4heapArray[j].ctgID, ctg4heapArray[i].ctgID );
-
- if ( cnt )
- {
- gap = ctg4heapArray[j].dis - ctg4heapArray[i].dis - contig_array[ctg4heapArray[j].ctgID].length;
-
- if ( gap > -overlaplen && gap >= cnt->gapLen && cnt->inherit == 0 )
- {
- objectCounter += cnt->weight;
- }
- }
- }
- }
-
- if ( supportCounter < 1 )
- { return 1; }
-
- if ( ( double ) objectCounter / supportCounter < tolerance )
- { return 0; }
-
- return 1;
+ int i, j, gap;
+ int supportCounter = 0;
+ int objectCounter = 0;
+ CONNECT *cnt;
+
+ for ( i = 1; i < nodeCounter; i++ )
+ {
+ for ( j = i + 1; j <= nodeCounter; j++ )
+ {
+ cnt = checkConnect ( ctg4heapArray[i].ctgID, ctg4heapArray[j].ctgID );
+
+ if ( cnt )
+ {
+ supportCounter += cnt->weight;
+ }
+
+ cnt = checkConnect ( ctg4heapArray[j].ctgID, ctg4heapArray[i].ctgID );
+
+ if ( cnt )
+ {
+ gap = ctg4heapArray[j].dis - ctg4heapArray[i].dis - contig_array[ctg4heapArray[j].ctgID].length;
+
+ if ( gap > -overlaplen && gap >= cnt->gapLen && cnt->inherit == 0 )
+ {
+ objectCounter += cnt->weight;
+ }
+ }
+ }
+ }
+
+ if ( supportCounter < 1 )
+ {
+ return 1;
+ }
+
+ if ( ( double ) objectCounter / supportCounter < tolerance )
+ {
+ return 0;
+ }
+
+ return 1;
}
/*************************************************
@@ -7027,15 +7623,17 @@ Return:
*************************************************/
static int getIndexInSortedSubgraph ( unsigned int node, int count )
{
- int index;
+ int index;
- for ( index = 0; index < count; ++index )
- {
- if ( nodesInSubInOrder[index] == node )
- { return index; }
- }
+ for ( index = 0; index < count; ++index )
+ {
+ if ( nodesInSubInOrder[index] == node )
+ {
+ return index;
+ }
+ }
- return -1;
+ return -1;
}
/*************************************************
@@ -7051,108 +7649,110 @@ Return:
Pointer to arc if existed.
NULL otherwise.
*************************************************/
-static preARC * getValidArc ( unsigned int node )
+static preARC *getValidArc ( unsigned int node )
{
- int num = 0;
- preARC * arc = contig_array[node].arcs;
+ int num = 0;
+ preARC *arc = contig_array[node].arcs;
- while ( arc )
- {
- if ( arc->multiplicity > 1 )
- {
- ++num;
+ while ( arc )
+ {
+ if ( arc->multiplicity > 1 )
+ {
+ ++num;
- if ( num > 1 )
- {
- return NULL;
- }
- }
+ if ( num > 1 )
+ {
+ return NULL;
+ }
+ }
- arc = arc->next;
- }
+ arc = arc->next;
+ }
- return arc;
+ return arc;
}
static boolean clearUpSubgraph()
{
- unsigned int i, ctg1, bal_ctg1, ctg2, bal_ctg2;
- int j, arc_num, num5, num3, index = 0, count = 0;
- preARC * arc;
- CONNECT * cnt;
-
- //put all contigs in "nodesInSub" array
- for ( i = 1; i <= nodeCounter; ++i )
- {
- nodesInSub[i - 1] = ctg4heapArray[i].ctgID;
- }
-
- for ( i = 0; i < nodeCounter; ++i )
- {
- ctg1 = nodesInSub[i];
- index = getIndexInSortedSubgraph ( ctg1, count );
-
- if ( index >= 0 && index < count - 1 ) //this contig is already in array
- { continue; }
-
- bal_ctg1 = getTwinCtg ( ctg1 );
- num5 = 0;
- num3 = 0;
- arc_num = 0;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg1;
- arc = getValidArc ( ctg1 );
-
- while ( arc )
- {
- ctg2 = arc->to_ed;
- bal_ctg2 = getTwinCtg ( ctg2 );
-
- if ( ( arc = getValidArc ( bal_ctg2 ) ) == NULL )
- {
- break;
- }
- else if ( arc->to_ed != bal_ctg1 )
- {
- break;
- }
-
- ctg1 = ctg2;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg1;
- arc = getValidArc ( ctg1 );
- }
-
- ctg1 = nodesInSub[i];
- arc = getValidArc ( bal_ctg1 );
-
- while ( arc )
- {
- bal_ctg2 = arc->to_ed;
- ctg2 = getTwinCtg ( bal_ctg2 );
-
- if ( ( arc = getValidArc ( ctg2 ) ) == NULL )
- {
- break;
- }
- else if ( arc->to_ed != ctg1 )
- {
- break;
- }
-
- ctg1 = ctg2;
- * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = ctg1;
- arc = getValidArc ( bal_ctg2 );
- }
-
- for ( j = num3 - 1; j >= 0; --j )
- {
- nodesInSubInOrder[index++] = * ( unsigned int * ) darrayGet ( scaf3, j );
- }
-
- for ( j = 0; j < num5; ++j )
- {
- nodesInSubInOrder[index++] = * ( unsigned int * ) darrayGet ( scaf5, j );
- }
- }
+ unsigned int i, ctg1, bal_ctg1, ctg2, bal_ctg2;
+ int j, arc_num, num5, num3, index = 0, count = 0;
+ preARC *arc;
+ CONNECT *cnt;
+
+ //put all contigs in "nodesInSub" array
+ for ( i = 1; i <= nodeCounter; ++i )
+ {
+ nodesInSub[i - 1] = ctg4heapArray[i].ctgID;
+ }
+
+ for ( i = 0; i < nodeCounter; ++i )
+ {
+ ctg1 = nodesInSub[i];
+ index = getIndexInSortedSubgraph ( ctg1, count );
+
+ if ( index >= 0 && index < count - 1 ) //this contig is already in array
+ {
+ continue;
+ }
+
+ bal_ctg1 = getTwinCtg ( ctg1 );
+ num5 = 0;
+ num3 = 0;
+ arc_num = 0;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg1;
+ arc = getValidArc ( ctg1 );
+
+ while ( arc )
+ {
+ ctg2 = arc->to_ed;
+ bal_ctg2 = getTwinCtg ( ctg2 );
+
+ if ( ( arc = getValidArc ( bal_ctg2 ) ) == NULL )
+ {
+ break;
+ }
+ else if ( arc->to_ed != bal_ctg1 )
+ {
+ break;
+ }
+
+ ctg1 = ctg2;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg1;
+ arc = getValidArc ( ctg1 );
+ }
+
+ ctg1 = nodesInSub[i];
+ arc = getValidArc ( bal_ctg1 );
+
+ while ( arc )
+ {
+ bal_ctg2 = arc->to_ed;
+ ctg2 = getTwinCtg ( bal_ctg2 );
+
+ if ( ( arc = getValidArc ( ctg2 ) ) == NULL )
+ {
+ break;
+ }
+ else if ( arc->to_ed != ctg1 )
+ {
+ break;
+ }
+
+ ctg1 = ctg2;
+ * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = ctg1;
+ arc = getValidArc ( bal_ctg2 );
+ }
+
+ for ( j = num3 - 1; j >= 0; --j )
+ {
+ nodesInSubInOrder[index++] = * ( unsigned int * ) darrayGet ( scaf3, j );
+ }
+
+ for ( j = 0; j < num5; ++j )
+ {
+ nodesInSubInOrder[index++] = * ( unsigned int * ) darrayGet ( scaf5, j );
+ }
+ }
}
/*************************************************
@@ -7171,72 +7771,72 @@ Return:
*************************************************/
static void transferCnt2RemainNode ( unsigned int maskNode, unsigned int remainNode )
{
- CONNECT * cnt = contig_array[maskNode].downwardConnect;
- CONNECT * bal_cnt, *nextCnt, *bal_nextCnt, *tmpCnt, *bal_tmpCnt;
- unsigned int nextNode1, bal_nextNode1, nextNode2, bal_nextNode2;
- unsigned int bal_maskNode = getTwinCtg ( maskNode ), bal_remainNode = getTwinCtg ( remainNode );
- int gap, weight, inherit;
-
- while ( cnt )
- {
- if ( cnt->mask )
- {
- cnt = cnt->next;
- continue;
- }
-
- nextNode1 = cnt->contigID;
- bal_nextNode1 = getTwinCtg ( nextNode1 );
- bal_cnt = getCntBetween ( bal_nextNode1, bal_maskNode );
- gap = cnt->gapLen;
- weight = cnt->weight;
- tmpCnt = getCntBetween ( remainNode, nextNode1 );
-
- if ( tmpCnt )
- {
- inherit = 0;
- }
- else
- {
- inherit = 1;
- }
-
- if ( cnt->nextInScaf )
- {
- nextNode2 = cnt->nextInScaf->contigID;
- bal_nextNode2 = getTwinCtg ( nextNode2 );
- nextCnt = getCntBetween ( nextNode1, nextNode2 );
- bal_nextCnt = getCntBetween ( bal_nextNode2, bal_nextNode1 );
-
- if ( nextNode1 != remainNode && nextNode2 != remainNode )
- {
- tmpCnt = add1Connect ( remainNode, nextNode1, gap, weight, inherit );
- bal_tmpCnt = add1Connect ( bal_nextNode1, bal_remainNode, gap, weight, inherit );
- tmpCnt->nextInScaf = nextCnt;
- tmpCnt->mask = 0;
- tmpCnt->deleted = 0;
- bal_nextCnt->nextInScaf = bal_tmpCnt;
- bal_tmpCnt->prevInScaf = 1;
- bal_tmpCnt->mask = 0;
- bal_tmpCnt->deleted = 0;
- }
- else
- {
- nextCnt->prevInScaf = 0;
- bal_nextCnt->nextInScaf = NULL;
- }
- }
-
- cnt->nextInScaf = NULL;
- cnt->prevInScaf = 0;
- cnt->mask = 1;
- cnt->deleted = 1;
- bal_cnt->nextInScaf = NULL;
- bal_cnt->prevInScaf = 0;
- bal_cnt->mask = 1;
- bal_cnt->deleted = 1;
- cnt = cnt->next;
- }
+ CONNECT *cnt = contig_array[maskNode].downwardConnect;
+ CONNECT *bal_cnt, *nextCnt, *bal_nextCnt, *tmpCnt, *bal_tmpCnt;
+ unsigned int nextNode1, bal_nextNode1, nextNode2, bal_nextNode2;
+ unsigned int bal_maskNode = getTwinCtg ( maskNode ), bal_remainNode = getTwinCtg ( remainNode );
+ int gap, weight, inherit;
+
+ while ( cnt )
+ {
+ if ( cnt->mask )
+ {
+ cnt = cnt->next;
+ continue;
+ }
+
+ nextNode1 = cnt->contigID;
+ bal_nextNode1 = getTwinCtg ( nextNode1 );
+ bal_cnt = getCntBetween ( bal_nextNode1, bal_maskNode );
+ gap = cnt->gapLen;
+ weight = cnt->weight;
+ tmpCnt = getCntBetween ( remainNode, nextNode1 );
+
+ if ( tmpCnt )
+ {
+ inherit = 0;
+ }
+ else
+ {
+ inherit = 1;
+ }
+
+ if ( cnt->nextInScaf )
+ {
+ nextNode2 = cnt->nextInScaf->contigID;
+ bal_nextNode2 = getTwinCtg ( nextNode2 );
+ nextCnt = getCntBetween ( nextNode1, nextNode2 );
+ bal_nextCnt = getCntBetween ( bal_nextNode2, bal_nextNode1 );
+
+ if ( nextNode1 != remainNode && nextNode2 != remainNode )
+ {
+ tmpCnt = add1Connect ( remainNode, nextNode1, gap, weight, inherit );
+ bal_tmpCnt = add1Connect ( bal_nextNode1, bal_remainNode, gap, weight, inherit );
+ tmpCnt->nextInScaf = nextCnt;
+ tmpCnt->mask = 0;
+ tmpCnt->deleted = 0;
+ bal_nextCnt->nextInScaf = bal_tmpCnt;
+ bal_tmpCnt->prevInScaf = 1;
+ bal_tmpCnt->mask = 0;
+ bal_tmpCnt->deleted = 0;
+ }
+ else
+ {
+ nextCnt->prevInScaf = 0;
+ bal_nextCnt->nextInScaf = NULL;
+ }
+ }
+
+ cnt->nextInScaf = NULL;
+ cnt->prevInScaf = 0;
+ cnt->mask = 1;
+ cnt->deleted = 1;
+ bal_cnt->nextInScaf = NULL;
+ bal_cnt->prevInScaf = 0;
+ bal_cnt->mask = 1;
+ bal_cnt->deleted = 1;
+ cnt = cnt->next;
+ }
}
/*************************************************
@@ -7253,39 +7853,39 @@ Return:
*************************************************/
static void maskNodeCnt ( unsigned int node )
{
- CONNECT * cnt = contig_array[node].downwardConnect;
- CONNECT * bal_cnt, *bal_nextCnt;
- unsigned int bal_nextNode1, bal_nextNode2;
-
- while ( cnt )
- {
- bal_nextNode1 = getTwinCtg ( cnt->contigID );
- bal_cnt = getCntBetween ( bal_nextNode1, getTwinCtg ( node ) );
-
- if ( cnt->nextInScaf )
- {
- cnt->nextInScaf->prevInScaf = 0;
- bal_nextNode2 = getTwinCtg ( cnt->nextInScaf->contigID );
- bal_nextCnt = getCntBetween ( bal_nextNode2, bal_nextNode1 );
-
- if ( !bal_nextCnt )
- {
- exit ( 1 );
- }
-
- bal_nextCnt->nextInScaf = NULL;
- }
-
- cnt->nextInScaf = NULL;
- cnt->prevInScaf = 0;
- cnt->mask = 1;
- cnt->deleted = 1;
- bal_cnt->nextInScaf = NULL;
- bal_cnt->prevInScaf = 0;
- bal_cnt->mask = 1;
- bal_cnt->deleted = 1;
- cnt = cnt->next;
- }
+ CONNECT *cnt = contig_array[node].downwardConnect;
+ CONNECT *bal_cnt, *bal_nextCnt;
+ unsigned int bal_nextNode1, bal_nextNode2;
+
+ while ( cnt )
+ {
+ bal_nextNode1 = getTwinCtg ( cnt->contigID );
+ bal_cnt = getCntBetween ( bal_nextNode1, getTwinCtg ( node ) );
+
+ if ( cnt->nextInScaf )
+ {
+ cnt->nextInScaf->prevInScaf = 0;
+ bal_nextNode2 = getTwinCtg ( cnt->nextInScaf->contigID );
+ bal_nextCnt = getCntBetween ( bal_nextNode2, bal_nextNode1 );
+
+ if ( !bal_nextCnt )
+ {
+ exit ( 1 );
+ }
+
+ bal_nextCnt->nextInScaf = NULL;
+ }
+
+ cnt->nextInScaf = NULL;
+ cnt->prevInScaf = 0;
+ cnt->mask = 1;
+ cnt->deleted = 1;
+ bal_cnt->nextInScaf = NULL;
+ bal_cnt->prevInScaf = 0;
+ bal_cnt->mask = 1;
+ bal_cnt->deleted = 1;
+ cnt = cnt->next;
+ }
}
/*************************************************
@@ -7303,26 +7903,26 @@ Output:
Return:
None.
*************************************************/
-static void getEndKmers ( char * seq, int len, int rev, char * firstKmer, char * lastKmer )
+static void getEndKmers ( char *seq, int len, int rev, char *firstKmer, char *lastKmer )
{
- int j;
-
- if ( 0 == rev )
- {
- for ( j = 0; j < overlaplen; ++j )
- {
- firstKmer[j] = int2base ( ( int ) getCharInTightString ( seq, j ) );
- lastKmer[j] = int2base ( ( int ) getCharInTightString ( seq, len - j - 1 ) );
- }
- }
- else
- {
- for ( j = 0; j < overlaplen; ++j )
- {
- firstKmer[j] = int2compbase ( ( int ) getCharInTightString ( seq, len - j - 1 ) );
- lastKmer[j] = int2compbase ( ( int ) getCharInTightString ( seq, j ) );
- }
- }
+ int j;
+
+ if ( 0 == rev )
+ {
+ for ( j = 0; j < overlaplen; ++j )
+ {
+ firstKmer[j] = int2base ( ( int ) getCharInTightString ( seq, j ) );
+ lastKmer[j] = int2base ( ( int ) getCharInTightString ( seq, len - j - 1 ) );
+ }
+ }
+ else
+ {
+ for ( j = 0; j < overlaplen; ++j )
+ {
+ firstKmer[j] = int2compbase ( ( int ) getCharInTightString ( seq, len - j - 1 ) );
+ lastKmer[j] = int2compbase ( ( int ) getCharInTightString ( seq, j ) );
+ }
+ }
}
/*************************************************
@@ -7338,28 +7938,30 @@ Output:
Return:
None.
*************************************************/
-static void output_ctg ( unsigned int ctg, FILE * fo )
+static void output_ctg ( unsigned int ctg, FILE *fo )
{
- if ( contig_array[ctg].length < 1 )
- { return; }
-
- int len;
- unsigned int bal_ctg = getTwinCtg ( ctg );
- len = contig_array[ctg].length + overlaplen;
- int col = 0;
-
- if ( contig_array[ctg].seq )
- {
- fprintf ( fo, ">C%d %4.1f\n", ctg, ( double ) contig_array[ctg].cvg );
- outputTightStr ( fo, contig_array[ctg].seq, 0, len, len, 0, &col );
- }
- else if ( contig_array[bal_ctg].seq )
- {
- fprintf ( fo, ">C%d %4.1f\n", bal_ctg, ( double ) contig_array[ctg].cvg );
- outputTightStr ( fo, contig_array[bal_ctg].seq, 0, len, len, 0, &col );
- }
-
- fprintf ( fo, "\n" );
+ if ( contig_array[ctg].length < 1 )
+ {
+ return;
+ }
+
+ int len;
+ unsigned int bal_ctg = getTwinCtg ( ctg );
+ len = contig_array[ctg].length + overlaplen;
+ int col = 0;
+
+ if ( contig_array[ctg].seq )
+ {
+ fprintf ( fo, ">C%d %4.1f\n", ctg, ( double ) contig_array[ctg].cvg );
+ outputTightStr ( fo, contig_array[ctg].seq, 0, len, len, 0, &col );
+ }
+ else if ( contig_array[bal_ctg].seq )
+ {
+ fprintf ( fo, ">C%d %4.1f\n", bal_ctg, ( double ) contig_array[ctg].cvg );
+ outputTightStr ( fo, contig_array[bal_ctg].seq, 0, len, len, 0, &col );
+ }
+
+ fprintf ( fo, "\n" );
}
@@ -7382,127 +7984,127 @@ Return:
*************************************************/
static int removeBubbleCtg()
{
- int i, j, count, gap, SnpCounter = 0, conflict = 0;
- unsigned int node1, node2, bal_node1, bal_node2;
- int len1, len2, addLast = 0;
- char * tightStr1, *tightStr2;
- char firstKmer1[overlaplen + 1], lastKmer1[overlaplen + 1], firstKmer2[overlaplen + 1], lastKmer2[overlaplen + 1];
- CONNECT * cnt, *bal_cnt;
- count = 0;
-
- for ( i = 1; i < nodeCounter; ++i )
- {
- node1 = ctg4heapArray[i].ctgID;
- node2 = ctg4heapArray[i + 1].ctgID;
- bal_node1 = getTwinCtg ( node1 );
- bal_node2 = getTwinCtg ( node2 );
- cnt = getCntBetween ( node1, node2 );
- bal_cnt = getCntBetween ( node2, node1 );
- gap = ctg4heapArray[i + 1].dis - ctg4heapArray[i].dis - ( int ) contig_array[node2].length;
-
- if ( gap >= 0 || contig_array[node1].cvg >= cvg4SNP || contig_array[node2].cvg >= cvg4SNP || cnt || bal_cnt )
- {
- nodesInSubInOrder[count] = node1;
- nodeDistanceInOrder[count++] = ctg4heapArray[i].dis;
- continue;
- }
-
- len1 = contig_array[node1].length + overlaplen;
- len2 = contig_array[node2].length + overlaplen;
-
- if ( contig_array[node1].seq )
- {
- getEndKmers ( contig_array[node1].seq, len1, 0, firstKmer1, lastKmer1 );
- }
- else
- {
- getEndKmers ( contig_array[bal_node1].seq, len1, 1, firstKmer1, lastKmer1 );
- }
-
- if ( contig_array[node2].seq )
- {
- getEndKmers ( contig_array[node2].seq, len2, 0, firstKmer2, lastKmer2 );
- }
- else
- {
- getEndKmers ( contig_array[bal_node2].seq, len2, 1, firstKmer2, lastKmer2 );
- }
-
- for ( j = 0; j < overlaplen; ++j )
- {
- if ( firstKmer1[j] != firstKmer2[j] || lastKmer1[j] != lastKmer2[j] )
- {
- nodesInSubInOrder[count] = node1;
- nodeDistanceInOrder[count++] = ctg4heapArray[i].dis;
- conflict = 1;
- break;
- }
- }
-
- if ( 1 == conflict )
- {
- conflict = 0;
- continue;
- }
-
- ++SnpCounter;
-
- if ( contig_array[node1].bubbleInScaff == 0 || contig_array[node2].bubbleInScaff == 0 )
- {
- contig_array[node1].bubbleInScaff = 1;
- contig_array[bal_node1].bubbleInScaff = 1;
- contig_array[node2].bubbleInScaff = 1;
- contig_array[bal_node2].bubbleInScaff = 1;
- output_ctg ( node1, snp_fp );
- output_ctg ( node2, snp_fp );
- }
-
- if ( contig_array[node1].cvg > contig_array[node2].cvg || ( len1 > len2 && contig_array[node1].cvg == contig_array[node2].cvg ) )
- {
- if ( i == nodeCounter - 1 )
- {
- nodesInSubInOrder[count] = node1;
- nodeDistanceInOrder[count++] = ctg4heapArray[i].dis;
- addLast = 1;
- }
-
- transferCnt2RemainNode ( node2, node1 );
- transferCnt2RemainNode ( bal_node2, bal_node1 );
- contig_array[node2].mask = 1;
- contig_array[bal_node2].mask = 1;
- ctg4heapArray[i + 1].ctgID = node1;
- ctg4heapArray[i + 1].dis = ctg4heapArray[i].dis;
- }
- else
- {
- if ( i == nodeCounter - 1 )
- {
- nodesInSubInOrder[count] = node2;
- nodeDistanceInOrder[count++] = ctg4heapArray[i + 1].dis;
- addLast = 1;
- }
-
- transferCnt2RemainNode ( node1, node2 );
- transferCnt2RemainNode ( bal_node1, bal_node2 );
- contig_array[node1].mask = 1;
- contig_array[getTwinCtg ( node1 )].mask = 1;
- }
- }
-
- if ( 0 == addLast )
- {
- nodesInSubInOrder[count] = ctg4heapArray[nodeCounter].ctgID;
- nodeDistanceInOrder[count++] = ctg4heapArray[nodeCounter].dis;
- }
-
- for ( i = 0; i < count; ++i )
- {
- ctg4heapArray[i + 1].ctgID = nodesInSubInOrder[i];
- ctg4heapArray[i + 1].dis = nodeDistanceInOrder[i];
- }
-
- nodeCounter = count;
- return SnpCounter;
+ int i, j, count, gap, SnpCounter = 0, conflict = 0;
+ unsigned int node1, node2, bal_node1, bal_node2;
+ int len1, len2, addLast = 0;
+ char *tightStr1, *tightStr2;
+ char firstKmer1[overlaplen + 1], lastKmer1[overlaplen + 1], firstKmer2[overlaplen + 1], lastKmer2[overlaplen + 1];
+ CONNECT *cnt, *bal_cnt;
+ count = 0;
+
+ for ( i = 1; i < nodeCounter; ++i )
+ {
+ node1 = ctg4heapArray[i].ctgID;
+ node2 = ctg4heapArray[i + 1].ctgID;
+ bal_node1 = getTwinCtg ( node1 );
+ bal_node2 = getTwinCtg ( node2 );
+ cnt = getCntBetween ( node1, node2 );
+ bal_cnt = getCntBetween ( node2, node1 );
+ gap = ctg4heapArray[i + 1].dis - ctg4heapArray[i].dis - ( int ) contig_array[node2].length;
+
+ if ( gap >= 0 || contig_array[node1].cvg >= cvg4SNP || contig_array[node2].cvg >= cvg4SNP || cnt || bal_cnt )
+ {
+ nodesInSubInOrder[count] = node1;
+ nodeDistanceInOrder[count++] = ctg4heapArray[i].dis;
+ continue;
+ }
+
+ len1 = contig_array[node1].length + overlaplen;
+ len2 = contig_array[node2].length + overlaplen;
+
+ if ( contig_array[node1].seq )
+ {
+ getEndKmers ( contig_array[node1].seq, len1, 0, firstKmer1, lastKmer1 );
+ }
+ else
+ {
+ getEndKmers ( contig_array[bal_node1].seq, len1, 1, firstKmer1, lastKmer1 );
+ }
+
+ if ( contig_array[node2].seq )
+ {
+ getEndKmers ( contig_array[node2].seq, len2, 0, firstKmer2, lastKmer2 );
+ }
+ else
+ {
+ getEndKmers ( contig_array[bal_node2].seq, len2, 1, firstKmer2, lastKmer2 );
+ }
+
+ for ( j = 0; j < overlaplen; ++j )
+ {
+ if ( firstKmer1[j] != firstKmer2[j] || lastKmer1[j] != lastKmer2[j] )
+ {
+ nodesInSubInOrder[count] = node1;
+ nodeDistanceInOrder[count++] = ctg4heapArray[i].dis;
+ conflict = 1;
+ break;
+ }
+ }
+
+ if ( 1 == conflict )
+ {
+ conflict = 0;
+ continue;
+ }
+
+ ++SnpCounter;
+
+ if ( contig_array[node1].bubbleInScaff == 0 || contig_array[node2].bubbleInScaff == 0 )
+ {
+ contig_array[node1].bubbleInScaff = 1;
+ contig_array[bal_node1].bubbleInScaff = 1;
+ contig_array[node2].bubbleInScaff = 1;
+ contig_array[bal_node2].bubbleInScaff = 1;
+ output_ctg ( node1, snp_fp );
+ output_ctg ( node2, snp_fp );
+ }
+
+ if ( contig_array[node1].cvg > contig_array[node2].cvg || ( len1 > len2 && contig_array[node1].cvg == contig_array[node2].cvg ) )
+ {
+ if ( i == nodeCounter - 1 )
+ {
+ nodesInSubInOrder[count] = node1;
+ nodeDistanceInOrder[count++] = ctg4heapArray[i].dis;
+ addLast = 1;
+ }
+
+ transferCnt2RemainNode ( node2, node1 );
+ transferCnt2RemainNode ( bal_node2, bal_node1 );
+ contig_array[node2].mask = 1;
+ contig_array[bal_node2].mask = 1;
+ ctg4heapArray[i + 1].ctgID = node1;
+ ctg4heapArray[i + 1].dis = ctg4heapArray[i].dis;
+ }
+ else
+ {
+ if ( i == nodeCounter - 1 )
+ {
+ nodesInSubInOrder[count] = node2;
+ nodeDistanceInOrder[count++] = ctg4heapArray[i + 1].dis;
+ addLast = 1;
+ }
+
+ transferCnt2RemainNode ( node1, node2 );
+ transferCnt2RemainNode ( bal_node1, bal_node2 );
+ contig_array[node1].mask = 1;
+ contig_array[getTwinCtg ( node1 )].mask = 1;
+ }
+ }
+
+ if ( 0 == addLast )
+ {
+ nodesInSubInOrder[count] = ctg4heapArray[nodeCounter].ctgID;
+ nodeDistanceInOrder[count++] = ctg4heapArray[nodeCounter].dis;
+ }
+
+ for ( i = 0; i < count; ++i )
+ {
+ ctg4heapArray[i + 1].ctgID = nodesInSubInOrder[i];
+ ctg4heapArray[i + 1].dis = nodeDistanceInOrder[i];
+ }
+
+ nodeCounter = count;
+ return SnpCounter;
}
/*************************************************
@@ -7519,84 +8121,88 @@ Return:
*************************************************/
static void general_linearization ( boolean strict )
{
- unsigned int i;
- int subCounter = 0;
- int out_num;
- boolean flag;
- int conflCounter = 0, overlapCounter = 0, eligibleCounter = 0;
- int SNPCtgCounter = 0;
- double overlapTolerance, conflTolerance;
- canexchange = 0, exchange_num = 0, failexchange = 0;
- fprintf ( stderr, "Start to linearize sub-graph.\n" );
-
- for ( i = num_ctg; i > 0; i-- )
- {
- if ( contig_array[i].mask )
- { continue; }
-
- out_num = validConnect ( i, NULL );
-
- if ( out_num < 2 )
- { continue; }
-
- flag = pickUpGeneralSubgraph ( i, MaxNodeInSub );
-
- if ( !flag )
- {
- continue;
- }
-
- subCounter++;
- qsort ( &ctg4heapArray[1], nodeCounter, sizeof ( CTGinHEAP ), cmp_ctg );
-
- if ( Insert_size < 1000 && cvg4SNP > 0.001 )
- {
- SNPCtgCounter += removeBubbleCtg();
- }
-
- flag = checkEligible();
-
- if ( !flag )
- {
- eligibleCounter++;
- setInGraph ( 0 );
- continue;
- }
-
- if ( strict )
- {
- overlapTolerance = OverlapPercent;
- conflTolerance = ConflPercent;
- }
- else
- {
- overlapTolerance = 2 * OverlapPercent;
- conflTolerance = 2 * ConflPercent;
- }
-
- flag = checkOverlapInBetween_general ( overlapTolerance );
-
- if ( !flag )
- {
- overlapCounter++;
- setInGraph ( 0 );
- continue;
- }
-
- flag = checkConflictCnt_general ( conflTolerance );
-
- if ( flag )
- {
- conflCounter++;
- setInGraph ( 0 );
- continue;
- }
-
- arrangeNodes_general();
- setInGraph ( 0 );
- }
-
- fprintf ( stderr, " Picked sub-graphs %d\n Connection-conflict %d\n Significant overlapping %d\n Eligible %d\n Bubble structures %d\n", subCounter, conflCounter, overlapCounter, eligibleCounter, SNPCtgCounter );
+ unsigned int i;
+ int subCounter = 0;
+ int out_num;
+ boolean flag;
+ int conflCounter = 0, overlapCounter = 0, eligibleCounter = 0;
+ int SNPCtgCounter = 0;
+ double overlapTolerance, conflTolerance;
+ canexchange = 0, exchange_num = 0, failexchange = 0;
+ fprintf ( stderr, "Start to linearize sub-graph.\n" );
+
+ for ( i = num_ctg; i > 0; i-- )
+ {
+ if ( contig_array[i].mask )
+ {
+ continue;
+ }
+
+ out_num = validConnect ( i, NULL );
+
+ if ( out_num < 2 )
+ {
+ continue;
+ }
+
+ flag = pickUpGeneralSubgraph ( i, MaxNodeInSub );
+
+ if ( !flag )
+ {
+ continue;
+ }
+
+ subCounter++;
+ qsort ( &ctg4heapArray[1], nodeCounter, sizeof ( CTGinHEAP ), cmp_ctg );
+
+ if ( Insert_size < 1000 && cvg4SNP > 0.001 )
+ {
+ SNPCtgCounter += removeBubbleCtg();
+ }
+
+ flag = checkEligible();
+
+ if ( !flag )
+ {
+ eligibleCounter++;
+ setInGraph ( 0 );
+ continue;
+ }
+
+ if ( strict )
+ {
+ overlapTolerance = OverlapPercent;
+ conflTolerance = ConflPercent;
+ }
+ else
+ {
+ overlapTolerance = 2 * OverlapPercent;
+ conflTolerance = 2 * ConflPercent;
+ }
+
+ flag = checkOverlapInBetween_general ( overlapTolerance );
+
+ if ( !flag )
+ {
+ overlapCounter++;
+ setInGraph ( 0 );
+ continue;
+ }
+
+ flag = checkConflictCnt_general ( conflTolerance );
+
+ if ( flag )
+ {
+ conflCounter++;
+ setInGraph ( 0 );
+ continue;
+ }
+
+ arrangeNodes_general();
+ setInGraph ( 0 );
+ }
+
+ fprintf ( stderr, " Picked sub-graphs %d\n Connection-conflict %d\n Significant overlapping %d\n Eligible %d\n Bubble structures %d\n", subCounter, conflCounter, overlapCounter, eligibleCounter, SNPCtgCounter );
}
/**** the fowllowing codes for detecting and break down scaffold at weak point **********/
@@ -7617,80 +8223,92 @@ Return:
*************************************************/
static void smallScaf()
{
- unsigned int i, ctg, bal_ctg, prevCtg;
- int counter = 0;
- CONNECT * bindCnt, *cnt;
-
- for ( i = 1; i <= num_ctg; i++ )
- { contig_array[i].flag = 0; }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect )
- { continue; }
-
- bindCnt = getBindCnt ( i );
-
- if ( !bindCnt )
- { continue; }
-
- counter++;
- contig_array[i].flag = 1;
- contig_array[getTwinCtg ( i )].flag = 1;
- prevCtg = getTwinCtg ( i );
-
- while ( bindCnt )
- {
- ctg = bindCnt->contigID;
- bal_ctg = getTwinCtg ( ctg );
- bindCnt->bySmall = 1;
- cnt = getCntBetween ( bal_ctg, prevCtg );
-
- if ( cnt )
- { cnt->bySmall = 1; }
-
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- prevCtg = bal_ctg;
- bindCnt = bindCnt->nextInScaf;
- }
-
- ctg = getTwinCtg ( i );
- bindCnt = getBindCnt ( ctg );
- prevCtg = i;
-
- while ( bindCnt )
- {
- ctg = bindCnt->contigID;
- bal_ctg = getTwinCtg ( ctg );
- bindCnt->bySmall = 1;
- cnt = getCntBetween ( bal_ctg, prevCtg );
-
- if ( cnt )
- { cnt->bySmall = 1; }
-
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- prevCtg = bal_ctg;
- bindCnt = bindCnt->nextInScaf;
- }
- }
-
- fprintf ( stderr, "Report from smallScaf: %d scaffolds by smallPE.\n", counter );
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( !contig_array[i].downwardConnect )
- { continue; }
-
- cnt = contig_array[i].downwardConnect;
-
- while ( cnt )
- {
- cnt->smallIns = 1;
- cnt = cnt->next;
- }
- }
+ unsigned int i, ctg, bal_ctg, prevCtg;
+ int counter = 0;
+ CONNECT *bindCnt, *cnt;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ contig_array[i].flag = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect )
+ {
+ continue;
+ }
+
+ bindCnt = getBindCnt ( i );
+
+ if ( !bindCnt )
+ {
+ continue;
+ }
+
+ counter++;
+ contig_array[i].flag = 1;
+ contig_array[getTwinCtg ( i )].flag = 1;
+ prevCtg = getTwinCtg ( i );
+
+ while ( bindCnt )
+ {
+ ctg = bindCnt->contigID;
+ bal_ctg = getTwinCtg ( ctg );
+ bindCnt->bySmall = 1;
+ cnt = getCntBetween ( bal_ctg, prevCtg );
+
+ if ( cnt )
+ {
+ cnt->bySmall = 1;
+ }
+
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ prevCtg = bal_ctg;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ ctg = getTwinCtg ( i );
+ bindCnt = getBindCnt ( ctg );
+ prevCtg = i;
+
+ while ( bindCnt )
+ {
+ ctg = bindCnt->contigID;
+ bal_ctg = getTwinCtg ( ctg );
+ bindCnt->bySmall = 1;
+ cnt = getCntBetween ( bal_ctg, prevCtg );
+
+ if ( cnt )
+ {
+ cnt->bySmall = 1;
+ }
+
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ prevCtg = bal_ctg;
+ bindCnt = bindCnt->nextInScaf;
+ }
+ }
+
+ fprintf ( stderr, "Report from smallScaf: %d scaffolds by smallPE.\n", counter );
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( !contig_array[i].downwardConnect )
+ {
+ continue;
+ }
+
+ cnt = contig_array[i].downwardConnect;
+
+ while ( cnt )
+ {
+ cnt->smallIns = 1;
+ cnt = cnt->next;
+ }
+ }
}
/*************************************************
@@ -7707,27 +8325,27 @@ Return:
*************************************************/
static void clearNewInsFlag()
{
- int i = 1;
- CONNECT * cnt;
-
- for ( ; i <= num_ctg; i++ )
- {
- cnt = contig_array[i].downwardConnect;
-
- while ( cnt )
- {
- cnt->newIns = 0;
-
- if ( Insert_size > 15000 )
- {
- cnt = cnt->next;
- continue;
- }
-
- cnt->maxGap = 0;
- cnt = cnt->next;
- }
- }
+ int i = 1;
+ CONNECT *cnt;
+
+ for ( ; i <= num_ctg; i++ )
+ {
+ cnt = contig_array[i].downwardConnect;
+
+ while ( cnt )
+ {
+ cnt->newIns = 0;
+
+ if ( Insert_size > 15000 )
+ {
+ cnt = cnt->next;
+ continue;
+ }
+
+ cnt->maxGap = 0;
+ cnt = cnt->next;
+ }
+ }
}
/*************************************************
@@ -7747,28 +8365,28 @@ Output:
Return:
1 if operation of putting succeeded.
*************************************************/
-static boolean putItem2Sarray ( unsigned int scaf, int wt, DARRAY * SCAF, DARRAY * WT, int counter )
+static boolean putItem2Sarray ( unsigned int scaf, int wt, DARRAY *SCAF, DARRAY *WT, int counter )
{
- int i;
- unsigned int * scafP, *wtP;
-
- for ( i = 0; i < counter; i++ )
- {
- scafP = ( unsigned int * ) darrayGet ( SCAF, i );
-
- if ( ( *scafP ) == scaf )
- {
- wtP = ( unsigned int * ) darrayGet ( WT, i );
- *wtP = ( *wtP + wt );
- return 0;
- }
- }
-
- scafP = ( unsigned int * ) darrayPut ( SCAF, counter );
- wtP = ( unsigned int * ) darrayPut ( WT, counter );
- *scafP = scaf;
- *wtP = wt;
- return 1;
+ int i;
+ unsigned int *scafP, *wtP;
+
+ for ( i = 0; i < counter; i++ )
+ {
+ scafP = ( unsigned int * ) darrayGet ( SCAF, i );
+
+ if ( ( *scafP ) == scaf )
+ {
+ wtP = ( unsigned int * ) darrayGet ( WT, i );
+ *wtP = ( *wtP + wt );
+ return 0;
+ }
+ }
+
+ scafP = ( unsigned int * ) darrayPut ( SCAF, counter );
+ wtP = ( unsigned int * ) darrayPut ( WT, counter );
+ *scafP = scaf;
+ *wtP = wt;
+ return 1;
}
/*************************************************
@@ -7790,66 +8408,68 @@ Output:
Return:
Number of other scaffolds being connected.
*************************************************/
-static int getDSLink2Scaf ( STACK * scafStack, DARRAY * SCAF, DARRAY * WT, int total_len )
+static int getDSLink2Scaf ( STACK *scafStack, DARRAY *SCAF, DARRAY *WT, int total_len )
{
- CONNECT * ite_cnt;
- CONNECT * bind_cnt;
- unsigned int ctg, targetCtg, bal_targetCtg, *pt;
- int counter = 0;
- int len = 0, gap;
- boolean inc;
- stackRecover ( scafStack );
-
- while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
- {
- ctg = *pt;
- bind_cnt = getBindCnt ( ctg );
- gap = bind_cnt ? bind_cnt->gapLen : 0;
- len += contig_array[ctg].length + gap;
-
- if ( ( contig_array[ctg].mask && contig_array[ctg].length < 500 ) || !contig_array[ctg].downwardConnect
- || total_len - len > Insert_size )
- {
- continue;
- }
-
- ite_cnt = contig_array[ctg].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->newIns != 1 )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- targetCtg = ite_cnt->contigID;
- bal_targetCtg = getTwinCtg ( targetCtg );
-
- if ( ( ite_cnt->mask && contig_array[targetCtg].length < 500 ) || ite_cnt->singleInScaf
- || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- if ( contig_array[ctg].from_vt == contig_array[targetCtg].from_vt // on the same scaff
- || ( targetCtg == contig_array[targetCtg].from_vt && bal_targetCtg == contig_array[bal_targetCtg].from_vt ) ) //targetCtg isn't in any scaffold
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- inc = putItem2Sarray ( contig_array[targetCtg].from_vt, ite_cnt->weight, SCAF, WT, counter );
-
- if ( inc )
- { counter++; }
-
- ite_cnt = ite_cnt->next;
- }
- }
-
- return counter;
+ CONNECT *ite_cnt;
+ CONNECT *bind_cnt;
+ unsigned int ctg, targetCtg, bal_targetCtg, *pt;
+ int counter = 0;
+ int len = 0, gap;
+ boolean inc;
+ stackRecover ( scafStack );
+
+ while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
+ {
+ ctg = *pt;
+ bind_cnt = getBindCnt ( ctg );
+ gap = bind_cnt ? bind_cnt->gapLen : 0;
+ len += contig_array[ctg].length + gap;
+
+ if ( ( contig_array[ctg].mask && contig_array[ctg].length < 500 ) || !contig_array[ctg].downwardConnect
+ || total_len - len > Insert_size )
+ {
+ continue;
+ }
+
+ ite_cnt = contig_array[ctg].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->newIns != 1 )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ targetCtg = ite_cnt->contigID;
+ bal_targetCtg = getTwinCtg ( targetCtg );
+
+ if ( ( ite_cnt->mask && contig_array[targetCtg].length < 500 ) || ite_cnt->singleInScaf
+ || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ if ( contig_array[ctg].from_vt == contig_array[targetCtg].from_vt // on the same scaff
+ || ( targetCtg == contig_array[targetCtg].from_vt && bal_targetCtg == contig_array[bal_targetCtg].from_vt ) ) //targetCtg isn't in any scaffold
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ inc = putItem2Sarray ( contig_array[targetCtg].from_vt, ite_cnt->weight, SCAF, WT, counter );
+
+ if ( inc )
+ {
+ counter++;
+ }
+
+ ite_cnt = ite_cnt->next;
+ }
+ }
+
+ return counter;
}
/*************************************************
@@ -7865,26 +8485,26 @@ Output:
Return:
Length of contig chain.
*************************************************/
-static int getScaffold ( unsigned int start, STACK * scafStack )
+static int getScaffold ( unsigned int start, STACK *scafStack )
{
- int len = contig_array[start].length;
- unsigned int * pt, ctg;
- emptyStack ( scafStack );
- pt = ( unsigned int * ) stackPush ( scafStack );
- *pt = start;
- CONNECT * bindCnt = getBindCnt ( start );
-
- while ( bindCnt )
- {
- ctg = bindCnt->contigID;
- pt = ( unsigned int * ) stackPush ( scafStack );
- *pt = ctg;
- len += bindCnt->gapLen + contig_array[ctg].length;
- bindCnt = bindCnt->nextInScaf;
- }
-
- stackBackup ( scafStack );
- return len;
+ int len = contig_array[start].length;
+ unsigned int *pt, ctg;
+ emptyStack ( scafStack );
+ pt = ( unsigned int * ) stackPush ( scafStack );
+ *pt = start;
+ CONNECT *bindCnt = getBindCnt ( start );
+
+ while ( bindCnt )
+ {
+ ctg = bindCnt->contigID;
+ pt = ( unsigned int * ) stackPush ( scafStack );
+ *pt = ctg;
+ len += bindCnt->gapLen + contig_array[ctg].length;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ stackBackup ( scafStack );
+ return len;
}
/*************************************************
@@ -7900,15 +8520,17 @@ Output:
Return:
1 if a reliable connection was found.
*************************************************/
-static boolean isLinkReliable ( DARRAY * WT, int count )
+static boolean isLinkReliable ( DARRAY *WT, int count )
{
- int i;
+ int i;
- for ( i = 0; i < count; i++ )
- if ( * ( int * ) darrayGet ( WT, i ) >= weakPE )
- { return 1; }
+ for ( i = 0; i < count; i++ )
+ if ( * ( int * ) darrayGet ( WT, i ) >= weakPE )
+ {
+ return 1;
+ }
- return 0;
+ return 0;
}
/*************************************************
@@ -7927,15 +8549,17 @@ Return:
Weight of connection was found.
0 otherwise.
*************************************************/
-static int getWtFromSarray ( DARRAY * SCAF, DARRAY * WT, int count, unsigned int scaf )
+static int getWtFromSarray ( DARRAY *SCAF, DARRAY *WT, int count, unsigned int scaf )
{
- int i;
+ int i;
- for ( i = 0; i < count; i++ )
- if ( * ( unsigned int * ) darrayGet ( SCAF, i ) == scaf )
- { return * ( int * ) darrayGet ( WT, i ); }
+ for ( i = 0; i < count; i++ )
+ if ( * ( unsigned int * ) darrayGet ( SCAF, i ) == scaf )
+ {
+ return * ( int * ) darrayGet ( WT, i );
+ }
- return 0;
+ return 0;
}
/*************************************************
@@ -7950,13 +8574,15 @@ Output:
Return:
None.
*************************************************/
-static void switch2twin ( STACK * scafStack )
+static void switch2twin ( STACK *scafStack )
{
- unsigned int * pt;
- stackRecover ( scafStack );
+ unsigned int *pt;
+ stackRecover ( scafStack );
- while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
- { *pt = getTwinCtg ( *pt ); }
+ while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
+ {
+ *pt = getTwinCtg ( *pt );
+ }
}
/*************************************************
@@ -7972,75 +8598,75 @@ Output:
Return:
None.
*************************************************/
-static void recoverLinks ( STACK * scafStack )
+static void recoverLinks ( STACK *scafStack )
{
- CONNECT * ite_cnt;
- unsigned int ctg, targetCtg, *pt;
- int counter = 0;
- boolean inc;
- unsigned int bal_ctg;
- stackRecover ( scafStack );
-
- while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
- {
- ctg = *pt;
-
- if ( contig_array[ctg].mask || !contig_array[ctg].downwardConnect )
- {
- continue;
- }
-
- ite_cnt = contig_array[ctg].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->mask || ite_cnt->singleInScaf || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit || ite_cnt->weight < weakPE )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- targetCtg = ite_cnt->contigID;
-
- if ( contig_array[ctg].from_vt == contig_array[targetCtg].from_vt ) // on the same scaff
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- setConnectDelete ( ctg, targetCtg, 0, 0 );
- ite_cnt = ite_cnt->next;
- }
-
- bal_ctg = getTwinCtg ( ctg );
-
- if ( contig_array[bal_ctg].mask || !contig_array[bal_ctg].downwardConnect )
- {
- continue;
- }
-
- ite_cnt = contig_array[bal_ctg].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->mask || ite_cnt->singleInScaf || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit || ite_cnt->weight < weakPE )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- targetCtg = ite_cnt->contigID;
-
- if ( contig_array[bal_ctg].from_vt == contig_array[targetCtg].from_vt ) // on the same scaff
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- setConnectDelete ( bal_ctg, targetCtg, 0, 0 );
- ite_cnt = ite_cnt->next;
- }
- }
+ CONNECT *ite_cnt;
+ unsigned int ctg, targetCtg, *pt;
+ int counter = 0;
+ boolean inc;
+ unsigned int bal_ctg;
+ stackRecover ( scafStack );
+
+ while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
+ {
+ ctg = *pt;
+
+ if ( contig_array[ctg].mask || !contig_array[ctg].downwardConnect )
+ {
+ continue;
+ }
+
+ ite_cnt = contig_array[ctg].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->mask || ite_cnt->singleInScaf || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit || ite_cnt->weight < weakPE )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ targetCtg = ite_cnt->contigID;
+
+ if ( contig_array[ctg].from_vt == contig_array[targetCtg].from_vt ) // on the same scaff
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ setConnectDelete ( ctg, targetCtg, 0, 0 );
+ ite_cnt = ite_cnt->next;
+ }
+
+ bal_ctg = getTwinCtg ( ctg );
+
+ if ( contig_array[bal_ctg].mask || !contig_array[bal_ctg].downwardConnect )
+ {
+ continue;
+ }
+
+ ite_cnt = contig_array[bal_ctg].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->mask || ite_cnt->singleInScaf || ite_cnt->nextInScaf || ite_cnt->prevInScaf || ite_cnt->inherit || ite_cnt->weight < weakPE )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ targetCtg = ite_cnt->contigID;
+
+ if ( contig_array[bal_ctg].from_vt == contig_array[targetCtg].from_vt ) // on the same scaff
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ setConnectDelete ( bal_ctg, targetCtg, 0, 0 );
+ ite_cnt = ite_cnt->next;
+ }
+ }
}
/*
------>
@@ -8065,73 +8691,75 @@ Return:
0 if both sets of contigs did not have reliable connection
to different scaffolds.
*************************************************/
-static boolean checkScafConsist ( STACK * scafStack1, int len1, STACK * scafStack2, int len2 )
+static boolean checkScafConsist ( STACK *scafStack1, int len1, STACK *scafStack2, int len2 )
{
- DARRAY * downwardTo1 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) ); // scaf links to those scaffolds
- DARRAY * downwardTo2 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- DARRAY * downwardWt1 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) ); // scaf links to scaffolds with those wt
- DARRAY * downwardWt2 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
- int linkCount1 = getDSLink2Scaf ( scafStack1, downwardTo1, downwardWt1, len1 );
- int linkCount2 = getDSLink2Scaf ( scafStack2, downwardTo2, downwardWt2, len2 );
-
- if ( !linkCount1 )
- {
- freeDarray ( downwardTo1 );
- freeDarray ( downwardTo2 );
- freeDarray ( downwardWt1 );
- freeDarray ( downwardWt2 );
- return 1;
- }
-
- boolean flag1 = isLinkReliable ( downwardWt1, linkCount1 );
-
- if ( !flag1 )
- {
- freeDarray ( downwardTo1 );
- freeDarray ( downwardTo2 );
- freeDarray ( downwardWt1 );
- freeDarray ( downwardWt2 );
- return 1;
- }
-
- unsigned int scaf;
- int i, wt1, wt2, ret = 1;
-
- for ( i = 0; i < linkCount1; i++ )
- {
- wt1 = * ( int * ) darrayGet ( downwardWt1, i );
-
- if ( wt1 < weakPE )
- { continue; }
-
- scaf = * ( unsigned int * ) darrayGet ( downwardTo1, i );
- wt2 = getWtFromSarray ( downwardTo2, downwardWt2, linkCount2, scaf );
-
- if ( wt2 < 1 )
- {
- ret = 0;
- break;
- }
- }
-
- if ( ret == 0 )
- {
- if ( linkCount1 && flag1 )
- {
- recoverLinks ( scafStack1 );
- }
-
- if ( linkCount2 )
- {
- recoverLinks ( scafStack2 );
- }
- }
-
- freeDarray ( downwardTo1 );
- freeDarray ( downwardTo2 );
- freeDarray ( downwardWt1 );
- freeDarray ( downwardWt2 );
- return ret;
+ DARRAY *downwardTo1 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) ); // scaf links to those scaffolds
+ DARRAY *downwardTo2 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ DARRAY *downwardWt1 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) ); // scaf links to scaffolds with those wt
+ DARRAY *downwardWt2 = ( DARRAY * ) createDarray ( 1000, sizeof ( unsigned int ) );
+ int linkCount1 = getDSLink2Scaf ( scafStack1, downwardTo1, downwardWt1, len1 );
+ int linkCount2 = getDSLink2Scaf ( scafStack2, downwardTo2, downwardWt2, len2 );
+
+ if ( !linkCount1 )
+ {
+ freeDarray ( downwardTo1 );
+ freeDarray ( downwardTo2 );
+ freeDarray ( downwardWt1 );
+ freeDarray ( downwardWt2 );
+ return 1;
+ }
+
+ boolean flag1 = isLinkReliable ( downwardWt1, linkCount1 );
+
+ if ( !flag1 )
+ {
+ freeDarray ( downwardTo1 );
+ freeDarray ( downwardTo2 );
+ freeDarray ( downwardWt1 );
+ freeDarray ( downwardWt2 );
+ return 1;
+ }
+
+ unsigned int scaf;
+ int i, wt1, wt2, ret = 1;
+
+ for ( i = 0; i < linkCount1; i++ )
+ {
+ wt1 = * ( int * ) darrayGet ( downwardWt1, i );
+
+ if ( wt1 < weakPE )
+ {
+ continue;
+ }
+
+ scaf = * ( unsigned int * ) darrayGet ( downwardTo1, i );
+ wt2 = getWtFromSarray ( downwardTo2, downwardWt2, linkCount2, scaf );
+
+ if ( wt2 < 1 )
+ {
+ ret = 0;
+ break;
+ }
+ }
+
+ if ( ret == 0 )
+ {
+ if ( linkCount1 && flag1 )
+ {
+ recoverLinks ( scafStack1 );
+ }
+
+ if ( linkCount2 )
+ {
+ recoverLinks ( scafStack2 );
+ }
+ }
+
+ freeDarray ( downwardTo1 );
+ freeDarray ( downwardTo2 );
+ freeDarray ( downwardWt1 );
+ freeDarray ( downwardWt2 );
+ return ret;
}
/*************************************************
@@ -8150,46 +8778,54 @@ Output:
Return:
None.
*************************************************/
-static void setBreakPoints ( DARRAY * ctgArray, int count, int weakest,
- int * start, int * finish )
+static void setBreakPoints ( DARRAY *ctgArray, int count, int weakest,
+ int *start, int *finish )
{
- int index = weakest - 1;
- unsigned int thisCtg;
- unsigned int nextCtg = * ( unsigned int * ) darrayGet ( ctgArray, weakest );
- CONNECT * cnt;
- *start = weakest;
-
- while ( index >= 0 )
- {
- thisCtg = * ( unsigned int * ) darrayGet ( ctgArray, index );
- cnt = getCntBetween ( thisCtg, nextCtg );
-
- if ( cnt->maxGap > 2 )
- { break; }
- else
- { *start = index; }
-
- nextCtg = thisCtg;
- index--;
- }
-
- unsigned int prevCtg = * ( unsigned int * ) darrayGet ( ctgArray, weakest + 1 );
- *finish = weakest + 1;
- index = weakest + 2;
-
- while ( index < count )
- {
- thisCtg = * ( unsigned int * ) darrayGet ( ctgArray, index );
- cnt = getCntBetween ( prevCtg, thisCtg );
-
- if ( cnt->maxGap > 2 )
- { break; }
- else
- { *finish = index; }
-
- prevCtg = thisCtg;
- index++;
- }
+ int index = weakest - 1;
+ unsigned int thisCtg;
+ unsigned int nextCtg = * ( unsigned int * ) darrayGet ( ctgArray, weakest );
+ CONNECT *cnt;
+ *start = weakest;
+
+ while ( index >= 0 )
+ {
+ thisCtg = * ( unsigned int * ) darrayGet ( ctgArray, index );
+ cnt = getCntBetween ( thisCtg, nextCtg );
+
+ if ( cnt->maxGap > 2 )
+ {
+ break;
+ }
+ else
+ {
+ *start = index;
+ }
+
+ nextCtg = thisCtg;
+ index--;
+ }
+
+ unsigned int prevCtg = * ( unsigned int * ) darrayGet ( ctgArray, weakest + 1 );
+ *finish = weakest + 1;
+ index = weakest + 2;
+
+ while ( index < count )
+ {
+ thisCtg = * ( unsigned int * ) darrayGet ( ctgArray, index );
+ cnt = getCntBetween ( prevCtg, thisCtg );
+
+ if ( cnt->maxGap > 2 )
+ {
+ break;
+ }
+ else
+ {
+ *finish = index;
+ }
+
+ prevCtg = thisCtg;
+ index++;
+ }
}
/*************************************************
@@ -8205,18 +8841,18 @@ Output:
Return:
None.
*************************************************/
-static void changeScafEnd ( STACK * scafStack, unsigned int end )
+static void changeScafEnd ( STACK *scafStack, unsigned int end )
{
- unsigned int ctg, *pt;
- unsigned int start = getTwinCtg ( end );
- stackRecover ( scafStack );
-
- while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
- {
- ctg = *pt;
- contig_array[ctg].to_vt = end;
- contig_array[getTwinCtg ( ctg )].from_vt = start;
- }
+ unsigned int ctg, *pt;
+ unsigned int start = getTwinCtg ( end );
+ stackRecover ( scafStack );
+
+ while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
+ {
+ ctg = *pt;
+ contig_array[ctg].to_vt = end;
+ contig_array[getTwinCtg ( ctg )].from_vt = start;
+ }
}
/*************************************************
@@ -8233,18 +8869,18 @@ Output:
Return:
None.
*************************************************/
-static void changeScafBegin ( STACK * scafStack, unsigned int start )
+static void changeScafBegin ( STACK *scafStack, unsigned int start )
{
- unsigned int ctg, *pt;
- unsigned int end = getTwinCtg ( start );
- stackRecover ( scafStack );
-
- while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
- {
- ctg = *pt;
- contig_array[ctg].from_vt = start;
- contig_array[getTwinCtg ( ctg )].to_vt = end;
- }
+ unsigned int ctg, *pt;
+ unsigned int end = getTwinCtg ( start );
+ stackRecover ( scafStack );
+
+ while ( ( pt = ( unsigned int * ) stackPop ( scafStack ) ) != NULL )
+ {
+ ctg = *pt;
+ contig_array[ctg].from_vt = start;
+ contig_array[getTwinCtg ( ctg )].to_vt = end;
+ }
}
/*************************************************
@@ -8263,207 +8899,227 @@ Return:
*************************************************/
static void detectBreakScaf()
{
- unsigned int i, avgPE, scafLen, len, ctg, bal_ctg, prevCtg, thisCtg;
- long long peCounter, linkCounter;
- int num3, num5, weakPoint, tempCounter, j, t, counter = 0;
- CONNECT * bindCnt, *cnt, *weakCnt;
- STACK * scafStack1 = ( STACK * ) createStack ( 1000, sizeof ( unsigned int ) );
- STACK * scafStack2 = ( STACK * ) createStack ( 1000, sizeof ( unsigned int ) );
-
- for ( i = 1; i <= num_ctg; i++ )
- { contig_array[i].flag = 0; }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect )
- { continue; }
-
- bindCnt = getBindCnt ( i );
-
- if ( !bindCnt )
- { continue; }
-
- //first scan to get the average coverage by longer pe
- num5 = num3 = peCounter = linkCounter = 0;
- scafLen = contig_array[i].length;
- ctg = i;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
- contig_array[i].flag = 1;
- contig_array[getTwinCtg ( i )].flag = 1;
-
- while ( bindCnt )
- {
- if ( !bindCnt->bySmall )
- { break; }
-
- linkCounter++;
- peCounter += bindCnt->maxGap;
- ctg = bindCnt->contigID;
- scafLen += contig_array[ctg].length;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- bindCnt = bindCnt->nextInScaf;
- }
-
- ctg = getTwinCtg ( i );
- bindCnt = getBindCnt ( ctg );
-
- while ( bindCnt )
- {
- if ( !bindCnt->bySmall )
- { break; }
-
- linkCounter++;
- peCounter += bindCnt->maxGap;
- ctg = bindCnt->contigID;
- scafLen += contig_array[ctg].length;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
- bindCnt = bindCnt->nextInScaf;
- }
-
- if ( linkCounter < 1 || scafLen < 5000 )
- { continue; }
-
- avgPE = peCounter / linkCounter;
-
- if ( avgPE < 10 )
- { continue; }
-
- tempCounter = 0;
-
- for ( j = num3 - 1; j >= 0; j-- )
- * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
- * ( unsigned int * ) darrayGet ( scaf3, j );
-
- for ( j = 0; j < num5; j++ )
- * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
- * ( unsigned int * ) darrayGet ( scaf5, j );
-
- prevCtg = * ( unsigned int * ) darrayGet ( tempArray, 0 );
- weakCnt = NULL;
- weakPoint = 0;
- len = contig_array[prevCtg].length;
-
- for ( t = 1; t < tempCounter; t++ )
- {
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, t );
-
- if ( len < 2000 )
- {
- len += contig_array[thisCtg].length;
- prevCtg = thisCtg;
- continue;
- }
- else if ( len > scafLen - 2000 )
- { break; }
-
- len += contig_array[thisCtg].length;
-
- if ( contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
- contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf )
- {
- prevCtg = thisCtg;
- continue;
- }
-
- cnt = getCntBetween ( prevCtg, thisCtg );
-
- if ( !weakCnt || weakCnt->maxGap > cnt->maxGap )
- {
- weakCnt = cnt;
- weakPoint = t;
- }
-
- prevCtg = thisCtg;
- }
-
- if ( !weakCnt || ( weakCnt->maxGap > 2 && weakCnt->maxGap > avgPE / 5 ) )
- { continue; }
-
- prevCtg = * ( unsigned int * ) darrayGet ( tempArray, weakPoint - 1 );
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, weakPoint );
-
- if ( contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
- contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf )
- {
- fprintf ( stderr, "contig %d and %d not on the same scaff\n", prevCtg, thisCtg );
- continue;
- }
-
- setConnectWP ( prevCtg, thisCtg, 1 );
- int index1, index2;
- setBreakPoints ( tempArray, tempCounter, weakPoint - 1, &index1, &index2 );
- unsigned int start = * ( unsigned int * ) darrayGet ( tempArray, index1 );
- unsigned int finish = * ( unsigned int * ) darrayGet ( tempArray, index2 );
- int len1 = getScaffold ( getTwinCtg ( start ), scafStack1 );
- int len2 = getScaffold ( finish, scafStack2 );
-
- if ( len1 < 2000 || len2 < 2000 )
- { continue; }
-
- switch2twin ( scafStack1 );
- int flag1 = checkScafConsist ( scafStack1, len1, scafStack2, len2 );
- switch2twin ( scafStack1 );
- switch2twin ( scafStack2 );
- int flag2 = checkScafConsist ( scafStack2, len2, scafStack1, len1 );
-
- if ( !flag1 || !flag2 )
- {
- changeScafBegin ( scafStack1, getTwinCtg ( start ) );
- changeScafEnd ( scafStack2, getTwinCtg ( finish ) );
- //unbind links
- unsigned int nextCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 + 1 );
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 );
- cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( thisCtg ) );
-
- if ( cnt->nextInScaf )
- {
- prevCtg = getTwinCtg ( cnt->nextInScaf->contigID );
- cnt->nextInScaf->prevInScaf = 0;
- cnt = getCntBetween ( prevCtg, thisCtg );
- cnt->nextInScaf = NULL;
- }
-
- prevCtg = * ( unsigned int * ) darrayGet ( tempArray, index2 - 1 );
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, index2 );
- cnt = getCntBetween ( prevCtg, thisCtg );
-
- if ( cnt->nextInScaf )
- {
- nextCtg = cnt->nextInScaf->contigID;
- cnt->nextInScaf->prevInScaf = 0;
- cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( thisCtg ) );
- cnt->nextInScaf = NULL;
- }
-
- prevCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 );
-
- for ( t = index1 + 1; t <= index2; t++ )
- {
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, t );
- cnt = getCntBetween ( prevCtg, thisCtg );
- cnt->mask = 1;
- cnt->nextInScaf = NULL;
- cnt->prevInScaf = 0;
- cnt = getCntBetween ( getTwinCtg ( thisCtg ), getTwinCtg ( prevCtg ) );
- cnt->mask = 1;
- cnt->nextInScaf = NULL;
- cnt->prevInScaf = 0;
- prevCtg = thisCtg;
- }
-
- counter++;
- }
- }
-
- freeStack ( scafStack1 );
- freeStack ( scafStack2 );
- fprintf ( stderr, "Report from checkScaf: %d scaffold segments broken.\n", counter );
+ unsigned int i, avgPE, scafLen, len, ctg, bal_ctg, prevCtg, thisCtg;
+ long long peCounter, linkCounter;
+ int num3, num5, weakPoint, tempCounter, j, t, counter = 0;
+ CONNECT *bindCnt, *cnt, *weakCnt;
+ STACK *scafStack1 = ( STACK * ) createStack ( 1000, sizeof ( unsigned int ) );
+ STACK *scafStack2 = ( STACK * ) createStack ( 1000, sizeof ( unsigned int ) );
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ contig_array[i].flag = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect )
+ {
+ continue;
+ }
+
+ bindCnt = getBindCnt ( i );
+
+ if ( !bindCnt )
+ {
+ continue;
+ }
+
+ //first scan to get the average coverage by longer pe
+ num5 = num3 = peCounter = linkCounter = 0;
+ scafLen = contig_array[i].length;
+ ctg = i;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
+ contig_array[i].flag = 1;
+ contig_array[getTwinCtg ( i )].flag = 1;
+
+ while ( bindCnt )
+ {
+ if ( !bindCnt->bySmall )
+ {
+ break;
+ }
+
+ linkCounter++;
+ peCounter += bindCnt->maxGap;
+ ctg = bindCnt->contigID;
+ scafLen += contig_array[ctg].length;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ ctg = getTwinCtg ( i );
+ bindCnt = getBindCnt ( ctg );
+
+ while ( bindCnt )
+ {
+ if ( !bindCnt->bySmall )
+ {
+ break;
+ }
+
+ linkCounter++;
+ peCounter += bindCnt->maxGap;
+ ctg = bindCnt->contigID;
+ scafLen += contig_array[ctg].length;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ if ( linkCounter < 1 || scafLen < 5000 )
+ {
+ continue;
+ }
+
+ avgPE = peCounter / linkCounter;
+
+ if ( avgPE < 10 )
+ {
+ continue;
+ }
+
+ tempCounter = 0;
+
+ for ( j = num3 - 1; j >= 0; j-- )
+ * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
+ * ( unsigned int * ) darrayGet ( scaf3, j );
+
+ for ( j = 0; j < num5; j++ )
+ * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
+ * ( unsigned int * ) darrayGet ( scaf5, j );
+
+ prevCtg = * ( unsigned int * ) darrayGet ( tempArray, 0 );
+ weakCnt = NULL;
+ weakPoint = 0;
+ len = contig_array[prevCtg].length;
+
+ for ( t = 1; t < tempCounter; t++ )
+ {
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, t );
+
+ if ( len < 2000 )
+ {
+ len += contig_array[thisCtg].length;
+ prevCtg = thisCtg;
+ continue;
+ }
+ else if ( len > scafLen - 2000 )
+ {
+ break;
+ }
+
+ len += contig_array[thisCtg].length;
+
+ if ( contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
+ contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf )
+ {
+ prevCtg = thisCtg;
+ continue;
+ }
+
+ cnt = getCntBetween ( prevCtg, thisCtg );
+
+ if ( !weakCnt || weakCnt->maxGap > cnt->maxGap )
+ {
+ weakCnt = cnt;
+ weakPoint = t;
+ }
+
+ prevCtg = thisCtg;
+ }
+
+ if ( !weakCnt || ( weakCnt->maxGap > 2 && weakCnt->maxGap > avgPE / 5 ) )
+ {
+ continue;
+ }
+
+ prevCtg = * ( unsigned int * ) darrayGet ( tempArray, weakPoint - 1 );
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, weakPoint );
+
+ if ( contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
+ contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf )
+ {
+ fprintf ( stderr, "contig %d and %d not on the same scaff\n", prevCtg, thisCtg );
+ continue;
+ }
+
+ setConnectWP ( prevCtg, thisCtg, 1 );
+ int index1, index2;
+ setBreakPoints ( tempArray, tempCounter, weakPoint - 1, &index1, &index2 );
+ unsigned int start = * ( unsigned int * ) darrayGet ( tempArray, index1 );
+ unsigned int finish = * ( unsigned int * ) darrayGet ( tempArray, index2 );
+ int len1 = getScaffold ( getTwinCtg ( start ), scafStack1 );
+ int len2 = getScaffold ( finish, scafStack2 );
+
+ if ( len1 < 2000 || len2 < 2000 )
+ {
+ continue;
+ }
+
+ switch2twin ( scafStack1 );
+ int flag1 = checkScafConsist ( scafStack1, len1, scafStack2, len2 );
+ switch2twin ( scafStack1 );
+ switch2twin ( scafStack2 );
+ int flag2 = checkScafConsist ( scafStack2, len2, scafStack1, len1 );
+
+ if ( !flag1 || !flag2 )
+ {
+ changeScafBegin ( scafStack1, getTwinCtg ( start ) );
+ changeScafEnd ( scafStack2, getTwinCtg ( finish ) );
+ //unbind links
+ unsigned int nextCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 + 1 );
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 );
+ cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( thisCtg ) );
+
+ if ( cnt->nextInScaf )
+ {
+ prevCtg = getTwinCtg ( cnt->nextInScaf->contigID );
+ cnt->nextInScaf->prevInScaf = 0;
+ cnt = getCntBetween ( prevCtg, thisCtg );
+ cnt->nextInScaf = NULL;
+ }
+
+ prevCtg = * ( unsigned int * ) darrayGet ( tempArray, index2 - 1 );
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, index2 );
+ cnt = getCntBetween ( prevCtg, thisCtg );
+
+ if ( cnt->nextInScaf )
+ {
+ nextCtg = cnt->nextInScaf->contigID;
+ cnt->nextInScaf->prevInScaf = 0;
+ cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( thisCtg ) );
+ cnt->nextInScaf = NULL;
+ }
+
+ prevCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 );
+
+ for ( t = index1 + 1; t <= index2; t++ )
+ {
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, t );
+ cnt = getCntBetween ( prevCtg, thisCtg );
+ cnt->mask = 1;
+ cnt->nextInScaf = NULL;
+ cnt->prevInScaf = 0;
+ cnt = getCntBetween ( getTwinCtg ( thisCtg ), getTwinCtg ( prevCtg ) );
+ cnt->mask = 1;
+ cnt->nextInScaf = NULL;
+ cnt->prevInScaf = 0;
+ prevCtg = thisCtg;
+ }
+
+ counter++;
+ }
+ }
+
+ freeStack ( scafStack1 );
+ freeStack ( scafStack2 );
+ fprintf ( stderr, "Report from checkScaf: %d scaffold segments broken.\n", counter );
}
@@ -8483,219 +9139,235 @@ Return:
*************************************************/
static void detectBreakScaff()
{
- unsigned int i, avgPE, scafLen, len, ctg, bal_ctg, prevCtg, thisCtg;
- long long peCounter, linkCounter;
- int num3, num5, weakPoint, tempCounter, j, t, counter = 0;
- int newInsNum;
- CONNECT * bindCnt, *cnt, *weakCnt;
- CONNECT * bal_cnt;
- STACK * scafStack1 = ( STACK * ) createStack ( 1000, sizeof ( unsigned int ) );
- STACK * scafStack2 = ( STACK * ) createStack ( 1000, sizeof ( unsigned int ) );
-
- for ( i = 1; i <= num_ctg; i++ )
- { contig_array[i].flag = 0; }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect )
- { continue; }
-
- bindCnt = getBindCnt ( i );
-
- if ( !bindCnt )
- { continue; }
-
- //first scan to get the average coverage by longer pe
- num5 = num3 = peCounter = linkCounter = 0;
- scafLen = contig_array[i].length;
- ctg = i;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
- contig_array[i].flag = 1;
- contig_array[getTwinCtg ( i )].flag = 1;
-
- while ( bindCnt )
- {
- linkCounter++;
- peCounter += bindCnt->maxGap;
- ctg = bindCnt->contigID;
- scafLen += bindCnt->gapLen + contig_array[ctg].length;
- * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- bindCnt = bindCnt->nextInScaf;
- }
-
- ctg = getTwinCtg ( i );
- bindCnt = getBindCnt ( ctg );
-
- while ( bindCnt )
- {
- linkCounter++;
- peCounter += bindCnt->maxGap;
- ctg = bindCnt->contigID;
- scafLen += bindCnt->gapLen + contig_array[ctg].length;
- bal_ctg = getTwinCtg ( ctg );
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
- * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
- bindCnt = bindCnt->nextInScaf;
- }
-
- if ( scafLen < Insert_size )
- { continue; }
-
- avgPE = peCounter / linkCounter;
-
- if ( avgPE < 10 )
- { continue; }
-
- tempCounter = 0;
-
- for ( j = num3 - 1; j >= 0; j-- )
- * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
- * ( unsigned int * ) darrayGet ( scaf3, j );
-
- for ( j = 0; j < num5; j++ )
- * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
- * ( unsigned int * ) darrayGet ( scaf5, j );
-
- prevCtg = * ( unsigned int * ) darrayGet ( tempArray, 0 );
- weakCnt = NULL;
- weakPoint = 0;
- len = contig_array[prevCtg].length;
-
- for ( t = 1; t < tempCounter; t++ )
- {
- newInsNum = 0;
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, t );
- cnt = contig_array[thisCtg].downwardConnect;
-
- while ( cnt )
- {
- if ( cnt->newIns == 1 )
- {
- ctg = cnt->contigID;
- bal_ctg = getTwinCtg ( ctg );
- newInsNum++;
- }
-
- cnt = cnt->next;
- }
-
- bal_cnt = contig_array[getTwinCtg ( thisCtg )].downwardConnect;
- cnt = getCntBetween ( prevCtg, thisCtg );
-
- if ( len < Insert_size )
- {
- len += cnt->gapLen + contig_array[thisCtg].length;
- prevCtg = thisCtg;
- continue;
- }
- else if ( len > scafLen - Insert_size )
- { break; }
-
- len += cnt->gapLen + contig_array[thisCtg].length;
-
- if ( contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
- contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf )
- {
- prevCtg = thisCtg;
- continue;
- }
-
- if ( !weakCnt || weakCnt->maxGap > cnt->maxGap )
- {
- weakCnt = cnt;
- weakPoint = t;
- }
-
- prevCtg = thisCtg;
- }
-
- if ( !weakCnt || ( weakCnt->maxGap > 2 && weakCnt->maxGap > avgPE / 5 ) )
- { continue; }
-
- prevCtg = * ( unsigned int * ) darrayGet ( tempArray, weakPoint - 1 );
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, weakPoint );
-
- if ( contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
- contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf )
- {
- printf ( "contig %d and %d not on the same scaff\n", prevCtg, thisCtg );
- continue;
- }
-
- setConnectWP ( prevCtg, thisCtg, 1 );
- // set start and end to break down the scaffold
- int index1, index2;
- setBreakPoints ( tempArray, tempCounter, weakPoint - 1, &index1, &index2 );
- unsigned int start = * ( unsigned int * ) darrayGet ( tempArray, index1 );
- unsigned int finish = * ( unsigned int * ) darrayGet ( tempArray, index2 );
- int len1 = getScaffold ( getTwinCtg ( start ), scafStack1 );
- int len2 = getScaffold ( finish, scafStack2 );
-
- if ( len1 < Insert_size || len2 < Insert_size )
- { continue; }
-
- switch2twin ( scafStack1 );
- int flag1 = checkScafConsist ( scafStack1, len1, scafStack2, len2 );
- switch2twin ( scafStack1 );
- switch2twin ( scafStack2 );
- int flag2 = checkScafConsist ( scafStack2, len2, scafStack1, len1 );
-
- if ( !flag1 || !flag2 )
- {
- changeScafBegin ( scafStack1, getTwinCtg ( start ) );
- changeScafEnd ( scafStack2, getTwinCtg ( finish ) );
- //unbind links
- unsigned int nextCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 + 1 );
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 );
- cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( thisCtg ) );
-
- if ( cnt->nextInScaf )
- {
- prevCtg = getTwinCtg ( cnt->nextInScaf->contigID );
- cnt->nextInScaf->prevInScaf = 0;
- cnt = getCntBetween ( prevCtg, thisCtg );
- cnt->nextInScaf = NULL;
- }
-
- prevCtg = * ( unsigned int * ) darrayGet ( tempArray, index2 - 1 );
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, index2 );
- cnt = getCntBetween ( prevCtg, thisCtg );
-
- if ( cnt->nextInScaf )
- {
- nextCtg = cnt->nextInScaf->contigID;
- cnt->nextInScaf->prevInScaf = 0;
- cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( thisCtg ) );
- cnt->nextInScaf = NULL;
- }
-
- prevCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 );
-
- for ( t = index1 + 1; t <= index2; t++ )
- {
- thisCtg = * ( unsigned int * ) darrayGet ( tempArray, t );
- cnt = getCntBetween ( prevCtg, thisCtg );
- cnt->mask = 1;
- cnt->nextInScaf = NULL;
- cnt->prevInScaf = 0;
- cnt = getCntBetween ( getTwinCtg ( thisCtg ), getTwinCtg ( prevCtg ) );
- cnt->mask = 1;
- cnt->nextInScaf = NULL;
- cnt->prevInScaf = 0;
- prevCtg = thisCtg;
- }
-
- counter++;
- }
- }
-
- freeStack ( scafStack1 );
- freeStack ( scafStack2 );
- fprintf ( stderr, "Report from checkScaf: %d scaffold segments broken.\n", counter );
+ unsigned int i, avgPE, scafLen, len, ctg, bal_ctg, prevCtg, thisCtg;
+ long long peCounter, linkCounter;
+ int num3, num5, weakPoint, tempCounter, j, t, counter = 0;
+ int newInsNum;
+ CONNECT *bindCnt, *cnt, *weakCnt;
+ CONNECT *bal_cnt;
+ STACK *scafStack1 = ( STACK * ) createStack ( 1000, sizeof ( unsigned int ) );
+ STACK *scafStack2 = ( STACK * ) createStack ( 1000, sizeof ( unsigned int ) );
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ contig_array[i].flag = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( contig_array[i].flag || contig_array[i].mask || !contig_array[i].downwardConnect )
+ {
+ continue;
+ }
+
+ bindCnt = getBindCnt ( i );
+
+ if ( !bindCnt )
+ {
+ continue;
+ }
+
+ //first scan to get the average coverage by longer pe
+ num5 = num3 = peCounter = linkCounter = 0;
+ scafLen = contig_array[i].length;
+ ctg = i;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = i;
+ contig_array[i].flag = 1;
+ contig_array[getTwinCtg ( i )].flag = 1;
+
+ while ( bindCnt )
+ {
+ linkCounter++;
+ peCounter += bindCnt->maxGap;
+ ctg = bindCnt->contigID;
+ scafLen += bindCnt->gapLen + contig_array[ctg].length;
+ * ( unsigned int * ) darrayPut ( scaf5, num5++ ) = ctg;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ ctg = getTwinCtg ( i );
+ bindCnt = getBindCnt ( ctg );
+
+ while ( bindCnt )
+ {
+ linkCounter++;
+ peCounter += bindCnt->maxGap;
+ ctg = bindCnt->contigID;
+ scafLen += bindCnt->gapLen + contig_array[ctg].length;
+ bal_ctg = getTwinCtg ( ctg );
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+ * ( unsigned int * ) darrayPut ( scaf3, num3++ ) = bal_ctg;
+ bindCnt = bindCnt->nextInScaf;
+ }
+
+ if ( scafLen < Insert_size )
+ {
+ continue;
+ }
+
+ avgPE = peCounter / linkCounter;
+
+ if ( avgPE < 10 )
+ {
+ continue;
+ }
+
+ tempCounter = 0;
+
+ for ( j = num3 - 1; j >= 0; j-- )
+ * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
+ * ( unsigned int * ) darrayGet ( scaf3, j );
+
+ for ( j = 0; j < num5; j++ )
+ * ( unsigned int * ) darrayPut ( tempArray, tempCounter++ ) =
+ * ( unsigned int * ) darrayGet ( scaf5, j );
+
+ prevCtg = * ( unsigned int * ) darrayGet ( tempArray, 0 );
+ weakCnt = NULL;
+ weakPoint = 0;
+ len = contig_array[prevCtg].length;
+
+ for ( t = 1; t < tempCounter; t++ )
+ {
+ newInsNum = 0;
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, t );
+ cnt = contig_array[thisCtg].downwardConnect;
+
+ while ( cnt )
+ {
+ if ( cnt->newIns == 1 )
+ {
+ ctg = cnt->contigID;
+ bal_ctg = getTwinCtg ( ctg );
+ newInsNum++;
+ }
+
+ cnt = cnt->next;
+ }
+
+ bal_cnt = contig_array[getTwinCtg ( thisCtg )].downwardConnect;
+ cnt = getCntBetween ( prevCtg, thisCtg );
+
+ if ( len < Insert_size )
+ {
+ len += cnt->gapLen + contig_array[thisCtg].length;
+ prevCtg = thisCtg;
+ continue;
+ }
+ else if ( len > scafLen - Insert_size )
+ {
+ break;
+ }
+
+ len += cnt->gapLen + contig_array[thisCtg].length;
+
+ if ( contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
+ contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf )
+ {
+ prevCtg = thisCtg;
+ continue;
+ }
+
+ if ( !weakCnt || weakCnt->maxGap > cnt->maxGap )
+ {
+ weakCnt = cnt;
+ weakPoint = t;
+ }
+
+ prevCtg = thisCtg;
+ }
+
+ if ( !weakCnt || ( weakCnt->maxGap > 2 && weakCnt->maxGap > avgPE / 5 ) )
+ {
+ continue;
+ }
+
+ prevCtg = * ( unsigned int * ) darrayGet ( tempArray, weakPoint - 1 );
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, weakPoint );
+
+ if ( contig_array[prevCtg].from_vt != contig_array[thisCtg].from_vt ||
+ contig_array[prevCtg].indexInScaf > contig_array[thisCtg].indexInScaf )
+ {
+ printf ( "contig %d and %d not on the same scaff\n", prevCtg, thisCtg );
+ continue;
+ }
+
+ setConnectWP ( prevCtg, thisCtg, 1 );
+ // set start and end to break down the scaffold
+ int index1, index2;
+ setBreakPoints ( tempArray, tempCounter, weakPoint - 1, &index1, &index2 );
+ unsigned int start = * ( unsigned int * ) darrayGet ( tempArray, index1 );
+ unsigned int finish = * ( unsigned int * ) darrayGet ( tempArray, index2 );
+ int len1 = getScaffold ( getTwinCtg ( start ), scafStack1 );
+ int len2 = getScaffold ( finish, scafStack2 );
+
+ if ( len1 < Insert_size || len2 < Insert_size )
+ {
+ continue;
+ }
+
+ switch2twin ( scafStack1 );
+ int flag1 = checkScafConsist ( scafStack1, len1, scafStack2, len2 );
+ switch2twin ( scafStack1 );
+ switch2twin ( scafStack2 );
+ int flag2 = checkScafConsist ( scafStack2, len2, scafStack1, len1 );
+
+ if ( !flag1 || !flag2 )
+ {
+ changeScafBegin ( scafStack1, getTwinCtg ( start ) );
+ changeScafEnd ( scafStack2, getTwinCtg ( finish ) );
+ //unbind links
+ unsigned int nextCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 + 1 );
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 );
+ cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( thisCtg ) );
+
+ if ( cnt->nextInScaf )
+ {
+ prevCtg = getTwinCtg ( cnt->nextInScaf->contigID );
+ cnt->nextInScaf->prevInScaf = 0;
+ cnt = getCntBetween ( prevCtg, thisCtg );
+ cnt->nextInScaf = NULL;
+ }
+
+ prevCtg = * ( unsigned int * ) darrayGet ( tempArray, index2 - 1 );
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, index2 );
+ cnt = getCntBetween ( prevCtg, thisCtg );
+
+ if ( cnt->nextInScaf )
+ {
+ nextCtg = cnt->nextInScaf->contigID;
+ cnt->nextInScaf->prevInScaf = 0;
+ cnt = getCntBetween ( getTwinCtg ( nextCtg ), getTwinCtg ( thisCtg ) );
+ cnt->nextInScaf = NULL;
+ }
+
+ prevCtg = * ( unsigned int * ) darrayGet ( tempArray, index1 );
+
+ for ( t = index1 + 1; t <= index2; t++ )
+ {
+ thisCtg = * ( unsigned int * ) darrayGet ( tempArray, t );
+ cnt = getCntBetween ( prevCtg, thisCtg );
+ cnt->mask = 1;
+ cnt->nextInScaf = NULL;
+ cnt->prevInScaf = 0;
+ cnt = getCntBetween ( getTwinCtg ( thisCtg ), getTwinCtg ( prevCtg ) );
+ cnt->mask = 1;
+ cnt->nextInScaf = NULL;
+ cnt->prevInScaf = 0;
+ prevCtg = thisCtg;
+ }
+
+ counter++;
+ }
+ }
+
+ freeStack ( scafStack1 );
+ freeStack ( scafStack2 );
+ fprintf ( stderr, "Report from checkScaf: %d scaffold segments broken.\n", counter );
}
/*************************************************
@@ -8712,30 +9384,32 @@ Output:
Return:
1 if no contig appeared more than once.
*************************************************/
-static boolean checkSimple ( DARRAY * ctgArray, int count )
+static boolean checkSimple ( DARRAY *ctgArray, int count )
{
- int i;
- unsigned int ctg;
-
- for ( i = 0; i < count; i++ )
- {
- ctg = * ( unsigned int * ) darrayGet ( ctgArray, i );
- contig_array[ctg].flag = 0;
- contig_array[getTwinCtg ( ctg )].flag = 0;
- }
-
- for ( i = 0; i < count; i++ )
- {
- ctg = * ( unsigned int * ) darrayGet ( ctgArray, i );
-
- if ( contig_array[ctg].flag )
- { return 0; }
-
- contig_array[ctg].flag = 1;
- contig_array[getTwinCtg ( ctg )].flag = 1;
- }
-
- return 1;
+ int i;
+ unsigned int ctg;
+
+ for ( i = 0; i < count; i++ )
+ {
+ ctg = * ( unsigned int * ) darrayGet ( ctgArray, i );
+ contig_array[ctg].flag = 0;
+ contig_array[getTwinCtg ( ctg )].flag = 0;
+ }
+
+ for ( i = 0; i < count; i++ )
+ {
+ ctg = * ( unsigned int * ) darrayGet ( ctgArray, i );
+
+ if ( contig_array[ctg].flag )
+ {
+ return 0;
+ }
+
+ contig_array[ctg].flag = 1;
+ contig_array[getTwinCtg ( ctg )].flag = 1;
+ }
+
+ return 1;
}
/*************************************************
@@ -8753,34 +9427,34 @@ Return:
*************************************************/
static void checkCircle()
{
- unsigned int i, ctg;
- CONNECT * cn_temp1;
- int counter = 0;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- cn_temp1 = contig_array[i].downwardConnect;
-
- while ( cn_temp1 )
- {
- if ( cn_temp1->weak || cn_temp1->deleted )
- {
- cn_temp1 = cn_temp1->next;
- continue;
- }
-
- ctg = cn_temp1->contigID;
-
- if ( checkConnect ( ctg, i ) )
- {
- counter++;
- maskContig ( i, 1 );
- maskContig ( ctg, 1 );
- }
-
- cn_temp1 = cn_temp1->next;
- }
- }
-
- fprintf ( stderr, "%d circles removed.\n", counter );
+ unsigned int i, ctg;
+ CONNECT *cn_temp1;
+ int counter = 0;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ cn_temp1 = contig_array[i].downwardConnect;
+
+ while ( cn_temp1 )
+ {
+ if ( cn_temp1->weak || cn_temp1->deleted )
+ {
+ cn_temp1 = cn_temp1->next;
+ continue;
+ }
+
+ ctg = cn_temp1->contigID;
+
+ if ( checkConnect ( ctg, i ) )
+ {
+ counter++;
+ maskContig ( i, 1 );
+ maskContig ( ctg, 1 );
+ }
+
+ cn_temp1 = cn_temp1->next;
+ }
+ }
+
+ fprintf ( stderr, "%d circles removed.\n", counter );
}
diff --git a/standardPregraph/output_contig.c b/standardPregraph/output_contig.c
index de5d0c5..5180245 100644
--- a/standardPregraph/output_contig.c
+++ b/standardPregraph/output_contig.c
@@ -1,7 +1,7 @@
/*
* output_contig.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -25,106 +25,106 @@
#include "kmerhash.h"
#include "extfunc.h"
#include "extvab.h"
-static char * kmerSeq;
-static unsigned int * flag_array;
-void output_graph ( char * outfile )
+static char *kmerSeq;
+static unsigned int *flag_array;
+void output_graph ( char *outfile )
{
- char name[256];
- FILE * fp;
- unsigned int i, bal_i;
- sprintf ( name, "%s.edge.gvz", outfile );
- fp = ckopen ( name, "w" );
- fprintf ( fp, "digraph G{\n" );
- fprintf ( fp, "\tsize=\"512,512\";\n" );
-
- for ( i = num_ed; i > 0; i-- )
- {
- if ( edge_array[i].deleted )
- {
- continue;
- }
-
- /*
- arcCount(i,&arcNum);
- if(arcNum<1)
- continue;
- */
- bal_i = getTwinEdge ( i );
- /*
- arcCount(bal_i,&arcNum);
- if(arcNum<1)
- continue;
- */
- fprintf ( fp, "\tV%d -> V%d[label =\"%d(%d)\"];\n", edge_array[i].from_vt, edge_array[i].to_vt, i, edge_array[i].length );
- }
-
- fprintf ( fp, "}\n" );
- fclose ( fp );
+ char name[256];
+ FILE *fp;
+ unsigned int i, bal_i;
+ sprintf ( name, "%s.edge.gvz", outfile );
+ fp = ckopen ( name, "w" );
+ fprintf ( fp, "digraph G{\n" );
+ fprintf ( fp, "\tsize=\"512,512\";\n" );
+
+ for ( i = num_ed; i > 0; i-- )
+ {
+ if ( edge_array[i].deleted )
+ {
+ continue;
+ }
+
+ /*
+ arcCount(i,&arcNum);
+ if(arcNum<1)
+ continue;
+ */
+ bal_i = getTwinEdge ( i );
+ /*
+ arcCount(bal_i,&arcNum);
+ if(arcNum<1)
+ continue;
+ */
+ fprintf ( fp, "\tV%d -> V%d[label =\"%d(%d)\"];\n", edge_array[i].from_vt, edge_array[i].to_vt, i, edge_array[i].length );
+ }
+
+ fprintf ( fp, "}\n" );
+ fclose ( fp );
}
-static void output_1contig ( int id, EDGE * edge, FILE * fp, boolean tip )
+static void output_1contig ( int id, EDGE *edge, FILE *fp, boolean tip )
{
- int i;
- Kmer kmer;
- fprintf ( fp, ">%d length %d cvg_%.1f_tip_%d\n", id, edge->length + overlaplen, ( double ) edge->cvg / 10, tip );
- //fprintf(fp,">%d\n",id);
- kmer = vt_array[edge->from_vt].kmer;
- printKmerSeq ( fp, kmer );
-
- for ( i = 0; i < edge->length; i++ )
- {
- fprintf ( fp, "%c", int2base ( ( int ) getCharInTightString ( edge->seq, i ) ) );
-
- if ( ( i + overlaplen + 1 ) % 100 == 0 )
- {
- fprintf ( fp, "\n" );
- }
- }
-
- if ( ( edge->length + overlaplen ) % 100 != 0 )
- {
- fprintf ( fp, "\n" );
- }
+ int i;
+ Kmer kmer;
+ fprintf ( fp, ">%d length %d cvg_%.1f_tip_%d\n", id, edge->length + overlaplen, ( double ) edge->cvg / 10, tip );
+ //fprintf(fp,">%d\n",id);
+ kmer = vt_array[edge->from_vt].kmer;
+ printKmerSeq ( fp, kmer );
+
+ for ( i = 0; i < edge->length; i++ )
+ {
+ fprintf ( fp, "%c", int2base ( ( int ) getCharInTightString ( edge->seq, i ) ) );
+
+ if ( ( i + overlaplen + 1 ) % 100 == 0 )
+ {
+ fprintf ( fp, "\n" );
+ }
+ }
+
+ if ( ( edge->length + overlaplen ) % 100 != 0 )
+ {
+ fprintf ( fp, "\n" );
+ }
}
-int cmp_int ( const void * a, const void * b )
+int cmp_int ( const void *a, const void *b )
{
- int * A, *B;
- A = ( int * ) a;
- B = ( int * ) b;
-
- if ( *A > *B )
- {
- return 1;
- }
- else if ( *A == *B )
- {
- return 0;
- }
- else
- {
- return -1;
- }
+ int *A, *B;
+ A = ( int * ) a;
+ B = ( int * ) b;
+
+ if ( *A > *B )
+ {
+ return 1;
+ }
+ else if ( *A == *B )
+ {
+ return 0;
+ }
+ else
+ {
+ return -1;
+ }
}
-int cmp_edge ( const void * a, const void * b )
+int cmp_edge ( const void *a, const void *b )
{
- EDGE * A, *B;
- A = ( EDGE * ) a;
- B = ( EDGE * ) b;
-
- if ( A->length > B->length )
- {
- return 1;
- }
- else if ( A->length == B->length )
- {
- return 0;
- }
- else
- {
- return -1;
- }
+ EDGE *A, *B;
+ A = ( EDGE * ) a;
+ B = ( EDGE * ) b;
+
+ if ( A->length > B->length )
+ {
+ return 1;
+ }
+ else if ( A->length == B->length )
+ {
+ return 0;
+ }
+ else
+ {
+ return -1;
+ }
}
/*************************************************
@@ -144,175 +144,177 @@ Output:
Return:
None.
*************************************************/
-void output_contig ( EDGE * ed_array, unsigned int ed_num, char * outfile, int cut_len )
+void output_contig ( EDGE *ed_array, unsigned int ed_num, char *outfile, int cut_len )
{
- char temp[256];
- FILE * fp, *fp_contig;
- int flag, count, len_c;
- int signI;
- unsigned int i, j, diff_len = 0;
- long long sum = 0, N90, N50;
- unsigned int * length_array;
- boolean tip;
- sprintf ( temp, "%s.contig", outfile );
- fp = ckopen ( temp, "w" );
- unsigned int * all_length_arr = ( unsigned int * ) ckalloc ( ( ed_num + 1 ) * sizeof ( unsigned int ) );
- index_array = ( unsigned int * ) ckalloc ( ( ed_num + 1 ) * sizeof ( unsigned int ) );
- flag_array = ( unsigned int * ) ckalloc ( ( ed_num + 1 ) * sizeof ( unsigned int ) );
-
- for ( i = 1; i <= ed_num; ++i )
- {
- index_array[i] = ed_array[i].length;
- all_length_arr[i] = ed_array[i].length;
- }
-
- qsort ( &all_length_arr[1], ed_num, sizeof ( all_length_arr[0] ), cmp_int );
-
- for ( i = 1; i <= ed_num; ++i )
- {
- for ( j = i + 1; j <= ed_num; ++j )
- {
- if ( all_length_arr[i] != all_length_arr[j] )
- { break; }
- }
-
- all_length_arr[++diff_len] = all_length_arr[i];
- flag_array[diff_len] = i;
- i = j - 1;
- }
-
- for ( i = 1; i <= ed_num; ++i )
- {
- index_array[i] = uniqueLenSearch ( all_length_arr, flag_array, diff_len, index_array[i] );
- }
-
- for ( i = 1; i <= ed_num; ++i )
- {
- flag_array[index_array[i]] = i;
- }
-
- free ( ( void * ) all_length_arr );
- length_array = ( unsigned int * ) ckalloc ( ed_num * sizeof ( unsigned int ) );
- kmerSeq = ( char * ) ckalloc ( overlaplen * sizeof ( char ) );
- //first scan for number counting
- count = len_c = 0;
-
- for ( i = 1; i <= ed_num; i++ )
- {
- if ( ( ed_array[i].length + overlaplen ) >= len_bar )
- {
- length_array[len_c++] = ed_array[i].length + overlaplen;
- }
-
- if ( ed_array[i].length < 1 || ed_array[i].deleted )
- {
- continue;
- }
-
- count++;
-
- if ( EdSmallerThanTwin ( i ) )
- {
- i++;
- }
- }
-
- sum = 0;
-
- for ( signI = len_c - 1; signI >= 0; signI-- )
- {
- sum += length_array[signI];
- }
-
- qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int );
-
- if ( len_c > 0 )
- {
- fprintf ( stderr, "\nThere are %d contig(s) longer than %d, sum up %lld bp, with average length %lld.\n", len_c, len_bar, sum, sum / len_c );
- fprintf ( stderr, "The longest length is %d bp, ", length_array[len_c - 1] );
- }
- else
- {
- fprintf ( stderr, "No contig was constructed!\n" );
- }
-
- N50 = sum * 0.5;
- N90 = sum * 0.9;
- sum = flag = 0;
-
- for ( signI = len_c - 1; signI >= 0; signI-- )
- {
- sum += length_array[signI];
-
- if ( !flag && sum >= N50 )
- {
- fprintf ( stderr, "contig N50 is %d bp,", length_array[signI] );
- flag = 1;
- }
-
- if ( sum >= N90 )
- {
- fprintf ( stderr, "contig N90 is %d bp.\n", length_array[signI] );
- break;
- }
- }
-
- for ( i = 1; i <= ed_num; i++ )
- {
- j = flag_array[i];
-
- if ( ed_array[j].deleted || ed_array[j].length < 1 )
- {
- continue;
- }
-
- if ( ed_array[j].arcs && ed_array[getTwinEdge ( j )].arcs )
- {
- tip = 0;
- }
- else
- {
- tip = 1;
- }
-
- output_1contig ( i, & ( ed_array[j] ), fp, tip );
-
- if ( EdSmallerThanTwin ( j ) )
- {
- i++;
- }
- }
-
- fclose ( fp );
- free ( ( void * ) kmerSeq );
- free ( ( void * ) length_array );
- fprintf ( stderr, "%d contig(s) longer than %d output.\n", count, cut_len );
- sprintf ( temp, "%s.ContigIndex", outfile );
- fp_contig = ckopen ( temp, "w" );
- fprintf ( fp_contig, "Edge_num %d %d\n", ed_num, count );
- fprintf ( fp_contig, "index\tlength\treverseComplement\n" );
-
- for ( i = 1; i <= num_ed; i++ )
- {
- j = flag_array[i];
- fprintf ( fp_contig, "%d\t%d\t", i, edge_array[j].length + overlaplen );
-
- if ( EdSmallerThanTwin ( j ) )
- {
- fprintf ( fp_contig, "1\n" );
- ++i;
- }
- else if ( EdLargerThanTwin ( j ) )
- {
- fprintf ( fp_contig, "-1\n" );
- }
- else
- {
- fprintf ( fp_contig, "0\n" );
- }
- }
-
- fclose ( fp_contig );
+ char temp[256];
+ FILE *fp, *fp_contig;
+ int flag, count, len_c;
+ int signI;
+ unsigned int i, j, diff_len = 0;
+ long long sum = 0, N90, N50;
+ unsigned int *length_array;
+ boolean tip;
+ sprintf ( temp, "%s.contig", outfile );
+ fp = ckopen ( temp, "w" );
+ unsigned int *all_length_arr = ( unsigned int * ) ckalloc ( ( ed_num + 1 ) * sizeof ( unsigned int ) );
+ index_array = ( unsigned int * ) ckalloc ( ( ed_num + 1 ) * sizeof ( unsigned int ) );
+ flag_array = ( unsigned int * ) ckalloc ( ( ed_num + 1 ) * sizeof ( unsigned int ) );
+
+ for ( i = 1; i <= ed_num; ++i )
+ {
+ index_array[i] = ed_array[i].length;
+ all_length_arr[i] = ed_array[i].length;
+ }
+
+ qsort ( &all_length_arr[1], ed_num, sizeof ( all_length_arr[0] ), cmp_int );
+
+ for ( i = 1; i <= ed_num; ++i )
+ {
+ for ( j = i + 1; j <= ed_num; ++j )
+ {
+ if ( all_length_arr[i] != all_length_arr[j] )
+ {
+ break;
+ }
+ }
+
+ all_length_arr[++diff_len] = all_length_arr[i];
+ flag_array[diff_len] = i;
+ i = j - 1;
+ }
+
+ for ( i = 1; i <= ed_num; ++i )
+ {
+ index_array[i] = uniqueLenSearch ( all_length_arr, flag_array, diff_len, index_array[i] );
+ }
+
+ for ( i = 1; i <= ed_num; ++i )
+ {
+ flag_array[index_array[i]] = i;
+ }
+
+ free ( ( void * ) all_length_arr );
+ length_array = ( unsigned int * ) ckalloc ( ed_num * sizeof ( unsigned int ) );
+ kmerSeq = ( char * ) ckalloc ( overlaplen * sizeof ( char ) );
+ //first scan for number counting
+ count = len_c = 0;
+
+ for ( i = 1; i <= ed_num; i++ )
+ {
+ if ( ( ed_array[i].length + overlaplen ) >= len_bar )
+ {
+ length_array[len_c++] = ed_array[i].length + overlaplen;
+ }
+
+ if ( ed_array[i].length < 1 || ed_array[i].deleted )
+ {
+ continue;
+ }
+
+ count++;
+
+ if ( EdSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+ }
+
+ sum = 0;
+
+ for ( signI = len_c - 1; signI >= 0; signI-- )
+ {
+ sum += length_array[signI];
+ }
+
+ qsort ( length_array, len_c, sizeof ( length_array[0] ), cmp_int );
+
+ if ( len_c > 0 )
+ {
+ fprintf ( stderr, "\nThere are %d contig(s) longer than %d, sum up %lld bp, with average length %lld.\n", len_c, len_bar, sum, sum / len_c );
+ fprintf ( stderr, "The longest length is %d bp, ", length_array[len_c - 1] );
+ }
+ else
+ {
+ fprintf ( stderr, "No contig was constructed!\n" );
+ }
+
+ N50 = sum * 0.5;
+ N90 = sum * 0.9;
+ sum = flag = 0;
+
+ for ( signI = len_c - 1; signI >= 0; signI-- )
+ {
+ sum += length_array[signI];
+
+ if ( !flag && sum >= N50 )
+ {
+ fprintf ( stderr, "contig N50 is %d bp,", length_array[signI] );
+ flag = 1;
+ }
+
+ if ( sum >= N90 )
+ {
+ fprintf ( stderr, "contig N90 is %d bp.\n", length_array[signI] );
+ break;
+ }
+ }
+
+ for ( i = 1; i <= ed_num; i++ )
+ {
+ j = flag_array[i];
+
+ if ( ed_array[j].deleted || ed_array[j].length < 1 )
+ {
+ continue;
+ }
+
+ if ( ed_array[j].arcs && ed_array[getTwinEdge ( j )].arcs )
+ {
+ tip = 0;
+ }
+ else
+ {
+ tip = 1;
+ }
+
+ output_1contig ( i, & ( ed_array[j] ), fp, tip );
+
+ if ( EdSmallerThanTwin ( j ) )
+ {
+ i++;
+ }
+ }
+
+ fclose ( fp );
+ free ( ( void * ) kmerSeq );
+ free ( ( void * ) length_array );
+ fprintf ( stderr, "%d contig(s) longer than %d output.\n", count, cut_len );
+ sprintf ( temp, "%s.ContigIndex", outfile );
+ fp_contig = ckopen ( temp, "w" );
+ fprintf ( fp_contig, "Edge_num %d %d\n", ed_num, count );
+ fprintf ( fp_contig, "index\tlength\treverseComplement\n" );
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ j = flag_array[i];
+ fprintf ( fp_contig, "%d\t%d\t", i, edge_array[j].length + overlaplen );
+
+ if ( EdSmallerThanTwin ( j ) )
+ {
+ fprintf ( fp_contig, "1\n" );
+ ++i;
+ }
+ else if ( EdLargerThanTwin ( j ) )
+ {
+ fprintf ( fp_contig, "-1\n" );
+ }
+ else
+ {
+ fprintf ( fp_contig, "0\n" );
+ }
+ }
+
+ fclose ( fp_contig );
}
/*************************************************
@@ -328,52 +330,57 @@ Return:
None.
*************************************************/
-void output_updated_edges ( char * outfile )
+void output_updated_edges ( char *outfile )
{
- FILE * fp;
- char name[256];
- unsigned int i, j, validCounter = 0;
- EDGE * edge;
- sprintf ( name, "%s.updated.edge", outfile );
- fp = ckopen ( name, "w" );
-
- for ( i = 1; i <= num_ed; i++ )
- {
- validCounter++;
- }
-
- fprintf ( fp, "EDGEs %d\n", validCounter );
- validCounter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- j = flag_array[i];
- edge = &edge_array[j];
-
- if ( edge->length != 0 )
- { fprintf ( fp, ">length %d,", edge->length + overlaplen ); }
- else { fprintf ( fp, ">length %d,", edge->length ); }
-
- if ( EdSmallerThanTwin ( j ) )
- {
- fprintf ( fp, "1," );
- }
- else if ( EdLargerThanTwin ( j ) )
- {
- fprintf ( fp, "-1," );
- }
- else
- {
- fprintf ( fp, "0," );
- }
-
- fprintf ( fp, "%d,", edge->cvg );
- print_kmer ( fp, vt_array[edge->from_vt].kmer, ',' );
- print_kmer ( fp, vt_array[edge->to_vt].kmer, ',' );
- fprintf ( fp, "\n" );
- }
-
- fclose ( fp );
+ FILE *fp;
+ char name[256];
+ unsigned int i, j, validCounter = 0;
+ EDGE *edge;
+ sprintf ( name, "%s.updated.edge", outfile );
+ fp = ckopen ( name, "w" );
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ validCounter++;
+ }
+
+ fprintf ( fp, "EDGEs %d\n", validCounter );
+ validCounter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ j = flag_array[i];
+ edge = &edge_array[j];
+
+ if ( edge->length != 0 )
+ {
+ fprintf ( fp, ">length %d,", edge->length + overlaplen );
+ }
+ else
+ {
+ fprintf ( fp, ">length %d,", edge->length );
+ }
+
+ if ( EdSmallerThanTwin ( j ) )
+ {
+ fprintf ( fp, "1," );
+ }
+ else if ( EdLargerThanTwin ( j ) )
+ {
+ fprintf ( fp, "-1," );
+ }
+ else
+ {
+ fprintf ( fp, "0," );
+ }
+
+ fprintf ( fp, "%d,", edge->cvg );
+ print_kmer ( fp, vt_array[edge->from_vt].kmer, ',' );
+ print_kmer ( fp, vt_array[edge->to_vt].kmer, ',' );
+ fprintf ( fp, "\n" );
+ }
+
+ fclose ( fp );
}
/*************************************************
@@ -388,45 +395,45 @@ Output:
Return:
None.
*************************************************/
-void output_heavyArcs ( char * outfile )
+void output_heavyArcs ( char *outfile )
{
- unsigned int i, j;
- char name[256];
- FILE * outfp;
- ARC * parc;
- sprintf ( name, "%s.Arc", outfile );
- outfp = ckopen ( name, "w" );
-
- for ( i = 1; i <= num_ed; i++ )
- {
- parc = edge_array[flag_array[i]].arcs;
-
- if ( !parc )
- {
- continue;
- }
-
- j = 0;
- fprintf ( outfp, "%u", i );
-
- while ( parc )
- {
- fprintf ( outfp, " %u %u", index_array[parc->to_ed], parc->multiplicity );
-
- if ( ( ++j ) % 10 == 0 )
- {
- fprintf ( outfp, "\n%u", i );
- }
-
- parc = parc->next;
- }
-
- fprintf ( outfp, "\n" );
- }
-
- fclose ( outfp );
- free ( ( void * ) index_array );
- free ( ( void * ) flag_array );
+ unsigned int i, j;
+ char name[256];
+ FILE *outfp;
+ ARC *parc;
+ sprintf ( name, "%s.Arc", outfile );
+ outfp = ckopen ( name, "w" );
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ parc = edge_array[flag_array[i]].arcs;
+
+ if ( !parc )
+ {
+ continue;
+ }
+
+ j = 0;
+ fprintf ( outfp, "%u", i );
+
+ while ( parc )
+ {
+ fprintf ( outfp, " %u %u", index_array[parc->to_ed], parc->multiplicity );
+
+ if ( ( ++j ) % 10 == 0 )
+ {
+ fprintf ( outfp, "\n%u", i );
+ }
+
+ parc = parc->next;
+ }
+
+ fprintf ( outfp, "\n" );
+ }
+
+ fclose ( outfp );
+ free ( ( void * ) index_array );
+ free ( ( void * ) flag_array );
}
diff --git a/standardPregraph/output_pregraph.c b/standardPregraph/output_pregraph.c
index f553885..2fff596 100644
--- a/standardPregraph/output_pregraph.c
+++ b/standardPregraph/output_pregraph.c
@@ -1,7 +1,7 @@
/*
* 31mer/output_pregraph.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -28,85 +28,85 @@
static int outvCounter = 0;
//after this LINKFLAGFILTER in the Kmer is destroyed
-static void output1vt ( kmer_t * node1, FILE * fp )
+static void output1vt ( kmer_t *node1, FILE *fp )
{
- if ( !node1 )
- {
- return;
- }
+ if ( !node1 )
+ {
+ return;
+ }
- if ( ! ( node1->linear ) && ! ( node1->deleted ) )
- {
- outvCounter++;
- print_kmer ( fp, node1->seq, ' ' );
+ if ( ! ( node1->linear ) && ! ( node1->deleted ) )
+ {
+ outvCounter++;
+ print_kmer ( fp, node1->seq, ' ' );
- if ( outvCounter % 8 == 0 )
- {
- fprintf ( fp, "\n" );
- }
- }
+ if ( outvCounter % 8 == 0 )
+ {
+ fprintf ( fp, "\n" );
+ }
+ }
}
-void output_vertex ( char * outfile )
+void output_vertex ( char *outfile )
{
- char temp[256];
- FILE * fp;
- int i;
- kmer_t * node;
- KmerSet * set;
- sprintf ( temp, "%s.vertex", outfile );
- fp = ckopen ( temp, "w" );
+ char temp[256];
+ FILE *fp;
+ int i;
+ kmer_t *node;
+ KmerSet *set;
+ sprintf ( temp, "%s.vertex", outfile );
+ fp = ckopen ( temp, "w" );
- for ( i = 0; i < thrd_num; i++ )
- {
- set = KmerSets[i];
- set->iter_ptr = 0;
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ set = KmerSets[i];
+ set->iter_ptr = 0;
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- node = set->array + set->iter_ptr;
- output1vt ( node, fp );
- }
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ node = set->array + set->iter_ptr;
+ output1vt ( node, fp );
+ }
- set->iter_ptr++;
- }
- }
+ set->iter_ptr++;
+ }
+ }
- fprintf ( fp, "\n" );
- fprintf ( stderr, "%d vertex(es) output.\n", outvCounter );
- fclose ( fp );
- sprintf ( temp, "%s.preGraphBasic", outfile );
- fp = ckopen ( temp, "w" );
- fprintf ( fp, "VERTEX %d K %d\n", outvCounter, overlaplen );
- fprintf ( fp, "\nEDGEs %d\n", num_ed );
- fprintf ( fp, "\nMaxReadLen %d MinReadLen %d MaxNameLen %d\n", maxReadLen4all, minReadLen, maxNameLen );
- fclose ( fp );
+ fprintf ( fp, "\n" );
+ fprintf ( stderr, "%d vertex(es) output.\n", outvCounter );
+ fclose ( fp );
+ sprintf ( temp, "%s.preGraphBasic", outfile );
+ fp = ckopen ( temp, "w" );
+ fprintf ( fp, "VERTEX %d K %d\n", outvCounter, overlaplen );
+ fprintf ( fp, "\nEDGEs %d\n", num_ed );
+ fprintf ( fp, "\nMaxReadLen %d MinReadLen %d MaxNameLen %d\n", maxReadLen4all, minReadLen, maxNameLen );
+ fclose ( fp );
}
-void output_1edge ( preEDGE * edge, gzFile * fp )
+void output_1edge ( preEDGE *edge, gzFile *fp )
{
- int i;
- gzprintf ( fp, ">length %d,", edge->length );
- print_kmer_gz ( fp, edge->from_node, ',' );
- print_kmer_gz ( fp, edge->to_node, ',' );
- gzprintf ( fp, "cvg %d, %d\n", edge->cvg, edge->bal_edge );
+ int i;
+ gzprintf ( fp, ">length %d,", edge->length );
+ print_kmer_gz ( fp, edge->from_node, ',' );
+ print_kmer_gz ( fp, edge->to_node, ',' );
+ gzprintf ( fp, "cvg %d, %d\n", edge->cvg, edge->bal_edge );
- for ( i = 0; i < edge->length; i++ )
- {
- gzprintf ( fp, "%c", int2base ( ( int ) edge->seq[i] ) );
+ for ( i = 0; i < edge->length; i++ )
+ {
+ gzprintf ( fp, "%c", int2base ( ( int ) edge->seq[i] ) );
- if ( ( i + 1 ) % 100 == 0 )
- {
- gzprintf ( fp, "\n" );
- }
- }
+ if ( ( i + 1 ) % 100 == 0 )
+ {
+ gzprintf ( fp, "\n" );
+ }
+ }
- if ( edge->length % 100 != 0 )
- {
- gzprintf ( fp, "\n" );
- }
+ if ( edge->length % 100 != 0 )
+ {
+ gzprintf ( fp, "\n" );
+ }
}
diff --git a/standardPregraph/output_scaffold.c b/standardPregraph/output_scaffold.c
index ada9486..a145541 100644
--- a/standardPregraph/output_scaffold.c
+++ b/standardPregraph/output_scaffold.c
@@ -1,7 +1,7 @@
/*
* output_scaffold.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -26,75 +26,75 @@
#include "extfunc.h"
#include "extvab.h"
-void output_contig_graph ( char * outfile )
+void output_contig_graph ( char *outfile )
{
- char name[256];
- FILE * fp;
- unsigned int i;
- sprintf ( name, "%s.contig.gvz", outfile );
- fp = ckopen ( name, "w" );
- fprintf ( fp, "digraph G{\n" );
- fprintf ( fp, "\tsize=\"512,512\";\n" );
+ char name[256];
+ FILE *fp;
+ unsigned int i;
+ sprintf ( name, "%s.contig.gvz", outfile );
+ fp = ckopen ( name, "w" );
+ fprintf ( fp, "digraph G{\n" );
+ fprintf ( fp, "\tsize=\"512,512\";\n" );
- for ( i = num_ctg; i > 0; i-- )
- {
- fprintf ( fp, "\tV%d -> V%d[label =\"%d(%d)\"];\n", contig_array[i].from_vt, contig_array[i].to_vt, i, contig_array[i].length );
- }
+ for ( i = num_ctg; i > 0; i-- )
+ {
+ fprintf ( fp, "\tV%d -> V%d[label =\"%d(%d)\"];\n", contig_array[i].from_vt, contig_array[i].to_vt, i, contig_array[i].length );
+ }
- fprintf ( fp, "}\n" );
- fclose ( fp );
+ fprintf ( fp, "}\n" );
+ fclose ( fp );
}
-void output_scaf ( char * outfile )
+void output_scaf ( char *outfile )
{
- char name[256];
- FILE * fp;
- unsigned int i;
- CONNECT * connect;
- boolean flag;
- sprintf ( name, "%s.scaffold.gvz", outfile );
- fp = ckopen ( name, "w" );
- fprintf ( fp, "digraph G{\n" );
- fprintf ( fp, "\tsize=\"512,512\";\n" );
+ char name[256];
+ FILE *fp;
+ unsigned int i;
+ CONNECT *connect;
+ boolean flag;
+ sprintf ( name, "%s.scaffold.gvz", outfile );
+ fp = ckopen ( name, "w" );
+ fprintf ( fp, "digraph G{\n" );
+ fprintf ( fp, "\tsize=\"512,512\";\n" );
- for ( i = num_ctg; i > 0; i-- )
- {
- //if(contig_array[i].mask||!contig_array[i].downwardConnect)
- if ( !contig_array[i].downwardConnect )
- {
- continue;
- }
+ for ( i = num_ctg; i > 0; i-- )
+ {
+ //if(contig_array[i].mask||!contig_array[i].downwardConnect)
+ if ( !contig_array[i].downwardConnect )
+ {
+ continue;
+ }
- connect = contig_array[i].downwardConnect;
+ connect = contig_array[i].downwardConnect;
- while ( connect )
- {
- //if(connect->mask||connect->deleted){
- if ( connect->deleted )
- {
- connect = connect->next;
- continue;
- }
+ while ( connect )
+ {
+ //if(connect->mask||connect->deleted){
+ if ( connect->deleted )
+ {
+ connect = connect->next;
+ continue;
+ }
- if ( connect->prevInScaf || connect->nextInScaf )
- {
- flag = 1;
- }
- else
- {
- flag = 0;
- }
+ if ( connect->prevInScaf || connect->nextInScaf )
+ {
+ flag = 1;
+ }
+ else
+ {
+ flag = 0;
+ }
- if ( !connect->mask )
- fprintf ( fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\"];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
- connect->gapLen, flag, connect->weight );
- else
- fprintf ( fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\", color = red];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
- connect->gapLen, flag, connect->weight );
+ if ( !connect->mask )
+ fprintf ( fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\"];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
+ connect->gapLen, flag, connect->weight );
+ else
+ fprintf ( fp, "\tC%d_%d -> C%d_%d [label = \"%d(%d_%d)\", color = red];\n", i, contig_array[i].length, connect->contigID, contig_array[connect->contigID].length,
+ connect->gapLen, flag, connect->weight );
- connect = connect->next;
- }
- }
+ connect = connect->next;
+ }
+ }
- fprintf ( fp, "}\n" );
- fclose ( fp );
+ fprintf ( fp, "}\n" );
+ fclose ( fp );
}
diff --git a/standardPregraph/pregraph.c b/standardPregraph/pregraph.c
index c4151b3..5237dad 100644
--- a/standardPregraph/pregraph.c
+++ b/standardPregraph/pregraph.c
@@ -1,7 +1,7 @@
/*
* pregraph.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -31,7 +31,7 @@ static char shortrdsfile[256]; //the reads config file name ,see -s option
static char graphfile[256]; //the output prefix name ,see -o option
static int cutTips = 1; //whether remove single tips or not. single tips , the tips starting from a kmer which coverage = 1
-static void initenv ( int argc, char ** argv );
+static void initenv ( int argc, char **argv );
static void display_pregraph_usage ();
@@ -59,171 +59,178 @@ Return:
Zero always
*************************************************/
-int call_pregraph ( int argc, char ** argv )
+int call_pregraph ( int argc, char **argv )
{
- time_t start_t, stop_t, time_bef, time_aft;
- time ( &start_t );
- fprintf ( stderr, "\n********************\n" );
- fprintf ( stderr, "Pregraph\n" );
- fprintf ( stderr, "********************\n\n" );
- initenv ( argc, argv );
-
- if ( overlaplen % 2 == 0 )
- {
- overlaplen++;
- fprintf ( stderr, "K should be an odd number.\n" );
- }
-
- if ( overlaplen < 13 )
- {
- overlaplen = 13;
- fprintf ( stderr, "K should not be less than 13.\n" );
- }
+ time_t start_t, stop_t, time_bef, time_aft;
+ time ( &start_t );
+ fprintf ( stderr, "\n********************\n" );
+ fprintf ( stderr, "Pregraph\n" );
+ fprintf ( stderr, "********************\n\n" );
+ initenv ( argc, argv );
+
+ if ( overlaplen % 2 == 0 )
+ {
+ overlaplen++;
+ fprintf ( stderr, "K should be an odd number.\n" );
+ }
+
+ if ( overlaplen < 13 )
+ {
+ overlaplen = 13;
+ fprintf ( stderr, "K should not be less than 13.\n" );
+ }
#ifdef MER127
- else if ( overlaplen > 127 )
- {
- overlaplen = 127;
- fprintf ( stderr, "K should not be greater than 127.\n" );
- }
+ else if ( overlaplen > 127 )
+ {
+ overlaplen = 127;
+ fprintf ( stderr, "K should not be greater than 127.\n" );
+ }
#else
- else if ( overlaplen > 63 )
- {
- overlaplen = 63;
- fprintf ( stderr, "K should not be greater than 63.\n" );
- }
+ else if ( overlaplen > 63 )
+ {
+ overlaplen = 63;
+ fprintf ( stderr, "K should not be greater than 63.\n" );
+ }
#endif
- time ( &time_bef );
- prlRead2HashTable ( shortrdsfile, graphfile );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on pre-graph construction: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
- // printf ("deLowKmer %d, deLowEdge %d\n", deLowKmer, deLowEdge);
- // fprintf (stderr,"DeLowKmer %d\n", deLowKmer);
-
- //analyzeTips(hash_table, graphfile);
- if ( !deLowKmer && cutTips )
- {
- time ( &time_bef );
- removeSingleTips ();
- removeMinorTips ();
- time ( &time_aft );
- fprintf ( stderr, "Time spent on removing tips: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
- }
- else
- {
- time ( &time_bef );
- removeMinorTips ();
- time ( &time_aft );
- fprintf ( stderr, "Time spent on removing tips: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
- }
-
- initKmerSetSize = 0;
- //combine each linear part to an edge
- time ( &time_bef );
- kmer2edges ( graphfile );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on constructing edges: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
- //map read to edge one by one
- time ( &time_bef );
- prlRead2edge ( shortrdsfile, graphfile );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on aligning reads: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
- output_vertex ( graphfile );
- free_Sets ( KmerSets, thrd_num );
- free_Sets ( KmerSetsPatch, thrd_num );
- time ( &stop_t );
- fprintf ( stderr, "Overall time spent on constructing pre-graph: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
- return 0;
+ time ( &time_bef );
+ prlRead2HashTable ( shortrdsfile, graphfile );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on pre-graph construction: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
+ // printf ("deLowKmer %d, deLowEdge %d\n", deLowKmer, deLowEdge);
+ // fprintf (stderr,"DeLowKmer %d\n", deLowKmer);
+
+ //analyzeTips(hash_table, graphfile);
+ if ( !deLowKmer && cutTips )
+ {
+ time ( &time_bef );
+ removeSingleTips ();
+ removeMinorTips ();
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on removing tips: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
+ }
+ else
+ {
+ time ( &time_bef );
+ removeMinorTips ();
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on removing tips: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
+ }
+
+ initKmerSetSize = 0;
+ //combine each linear part to an edge
+ time ( &time_bef );
+ kmer2edges ( graphfile );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on constructing edges: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
+ //map read to edge one by one
+ time ( &time_bef );
+ prlRead2edge ( shortrdsfile, graphfile );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on aligning reads: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
+ output_vertex ( graphfile );
+ free_Sets ( KmerSets, thrd_num );
+ free_Sets ( KmerSetsPatch, thrd_num );
+ time ( &stop_t );
+ fprintf ( stderr, "Overall time spent on constructing pre-graph: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
+ return 0;
}
-void initenv ( int argc, char ** argv )
+void initenv ( int argc, char **argv )
{
- int copt;
- int inpseq, outseq;
- extern char * optarg;
- char temp[100];
- optind = 1;
- inpseq = outseq = 0;
- fprintf ( stderr, "Parameters: pregraph " );
-
- while ( ( copt = getopt ( argc, argv, "a:s:o:K:p:d:R" ) ) != EOF )
- {
- //printf("get option\n");
- switch ( copt )
- {
- case 's':
- fprintf ( stderr, "-s %s ", optarg );
- inpseq = 1;
- sscanf ( optarg, "%s", shortrdsfile );
- break;
- case 'o':
- fprintf ( stderr, "-o %s ", optarg );
- outseq = 1;
- sscanf ( optarg, "%s", graphfile );
- break;
- case 'K':
- fprintf ( stderr, "-K %s ", optarg );
- sscanf ( optarg, "%s", temp );
- overlaplen = atoi ( temp );
- break;
- case 'p':
- fprintf ( stderr, "-p %s ", optarg );
- sscanf ( optarg, "%s", temp );
- thrd_num = atoi ( temp );
- break;
- case 'R':
- repsTie = 1;
- fprintf ( stderr, "-R " );
- break;
- case 'd':
- fprintf ( stderr, "-d %s ", optarg );
- sscanf ( optarg, "%s", temp );
- deLowKmer = atoi ( temp ) >= 0 ? atoi ( temp ) : 0;
- break;
- /*
- case 'D':
- deLowEdge = 1;
- break;
- */
- case 'a':
- fprintf ( stderr, "-a %s ", optarg );
- initKmerSetSize = atoi ( optarg );
- break;
- default:
-
- if ( inpseq == 0 || outseq == 0 )
- {
- display_pregraph_usage ();
- exit ( -1 );
- }
- }
- }
-
- fprintf ( stderr, "\n\n" );
-
- if ( inpseq == 0 || outseq == 0 )
- {
- //printf("need more\n");
- display_pregraph_usage ();
- exit ( -1 );
- }
+ int copt;
+ int inpseq, outseq;
+ extern char *optarg;
+ char temp[100];
+ optind = 1;
+ inpseq = outseq = 0;
+ fprintf ( stderr, "Parameters: pregraph " );
+
+ while ( ( copt = getopt ( argc, argv, "a:s:o:K:p:d:R" ) ) != EOF )
+ {
+ //printf("get option\n");
+ switch ( copt )
+ {
+ case 's':
+ fprintf ( stderr, "-s %s ", optarg );
+ inpseq = 1;
+ sscanf ( optarg, "%s", shortrdsfile );
+ break;
+
+ case 'o':
+ fprintf ( stderr, "-o %s ", optarg );
+ outseq = 1;
+ sscanf ( optarg, "%s", graphfile );
+ break;
+
+ case 'K':
+ fprintf ( stderr, "-K %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ overlaplen = atoi ( temp );
+ break;
+
+ case 'p':
+ fprintf ( stderr, "-p %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ thrd_num = atoi ( temp );
+ break;
+
+ case 'R':
+ repsTie = 1;
+ fprintf ( stderr, "-R " );
+ break;
+
+ case 'd':
+ fprintf ( stderr, "-d %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ deLowKmer = atoi ( temp ) >= 0 ? atoi ( temp ) : 0;
+ break;
+
+ /*
+ case 'D':
+ deLowEdge = 1;
+ break;
+ */
+ case 'a':
+ fprintf ( stderr, "-a %s ", optarg );
+ initKmerSetSize = atoi ( optarg );
+ break;
+
+ default:
+
+ if ( inpseq == 0 || outseq == 0 )
+ {
+ display_pregraph_usage ();
+ exit ( -1 );
+ }
+ }
+ }
+
+ fprintf ( stderr, "\n\n" );
+
+ if ( inpseq == 0 || outseq == 0 )
+ {
+ //printf("need more\n");
+ display_pregraph_usage ();
+ exit ( -1 );
+ }
}
static void display_pregraph_usage ()
{
- fprintf ( stderr, "\npregraph -s configFile -o outputGraph [-R] [-K kmer -p n_cpu -a initMemoryAssumption -d KmerFreqCutoff]\n" );
- fprintf ( stderr, " -s <string> configFile: the config file of solexa reads\n" );
- fprintf ( stderr, " -o <string> outputGraph: prefix of output graph file name\n" );
+ fprintf ( stderr, "\npregraph -s configFile -o outputGraph [-R] [-K kmer -p n_cpu -a initMemoryAssumption -d KmerFreqCutoff]\n" );
+ fprintf ( stderr, " -s <string> configFile: the config file of solexa reads\n" );
+ fprintf ( stderr, " -o <string> outputGraph: prefix of output graph file name\n" );
#ifdef MER127
- fprintf ( stderr, " -K <int> kmer(min 13, max 127): kmer size, [23]\n" );
+ fprintf ( stderr, " -K <int> kmer(min 13, max 127): kmer size, [23]\n" );
#else
- fprintf ( stderr, " -K <int> kmer(min 13, max 63): kmer size, [23]\n" );
+ fprintf ( stderr, " -K <int> kmer(min 13, max 63): kmer size, [23]\n" );
#endif
- fprintf ( stderr, " -p <int> n_cpu: number of cpu for use, [8]\n" );
- fprintf ( stderr, " -a <int> initMemoryAssumption: memory assumption initialized to avoid further reallocation, unit GB, [0]\n" );
- fprintf ( stderr, " -R (optional) output extra information for resolving repeats in contig step, [NO]\n" );
- fprintf ( stderr, " -d <int> KmerFreqCutoff: kmers with frequency no larger than KmerFreqCutoff will be deleted, [0]\n" );
+ fprintf ( stderr, " -p <int> n_cpu: number of cpu for use, [8]\n" );
+ fprintf ( stderr, " -a <int> initMemoryAssumption: memory assumption initialized to avoid further reallocation, unit GB, [0]\n" );
+ fprintf ( stderr, " -R (optional) output extra information for resolving repeats in contig step, [NO]\n" );
+ fprintf ( stderr, " -d <int> KmerFreqCutoff: kmers with frequency no larger than KmerFreqCutoff will be deleted, [0]\n" );
}
diff --git a/standardPregraph/prlHashCtg.c b/standardPregraph/prlHashCtg.c
index f14c7c7..f59d7a7 100644
--- a/standardPregraph/prlHashCtg.c
+++ b/standardPregraph/prlHashCtg.c
@@ -1,7 +1,7 @@
/*
* prlHashCtg.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -27,16 +27,16 @@
#include "extvab.h"
//debugging variables
-static long long * kmerCounter;
+static long long *kmerCounter;
//buffer related varibles for chop kmer
static unsigned int read_c;
-static char ** rcSeq;
-static char * seqBuffer;
-static int * lenBuffer;
-static unsigned int * indexArray;
-static unsigned int * seqBreakers; //record sum length to indicate start pos on seqBuffer
-static int * ctgIdArray;
+static char **rcSeq;
+static char *seqBuffer;
+static int *lenBuffer;
+static unsigned int *indexArray;
+static unsigned int *seqBreakers; //record sum length to indicate start pos on seqBuffer
+static int *ctgIdArray;
//static Kmer *firstKmers;
@@ -45,70 +45,70 @@ static unsigned int buffer_size = 100000000;
static unsigned int seq_buffer_size;
static unsigned int max_read_c;
static volatile unsigned int kmer_c;
-static Kmer * kmerBuffer;
-static ubyte8 * hashBanBuffer;
-static boolean * smallerBuffer;
+static Kmer *kmerBuffer;
+static ubyte8 *hashBanBuffer;
+static boolean *smallerBuffer;
-static void singleKmer ( int t, KmerSet * kset, unsigned int seq_index, unsigned int pos );
+static void singleKmer ( int t, KmerSet *kset, unsigned int seq_index, unsigned int pos );
static void chopKmer4read ( int t, int threadID );
-static void threadRoutine ( void * para )
+static void threadRoutine ( void *para )
{
- PARAMETER * prm;
- unsigned int i;
- unsigned char id;
- prm = ( PARAMETER * ) para;
- id = prm->threadID;
-
- while ( 1 )
- {
- if ( * ( prm->selfSignal ) == 1 )
- {
- unsigned int seq_index = 0;
- unsigned int pos = 0;
-
- for ( i = 0; i < kmer_c; i++ )
- {
- if ( seq_index < read_c && indexArray[seq_index + 1] == i )
- {
- seq_index++; // which sequence this kmer belongs to
- pos = 0;
- }
-
- if ( ( unsigned char ) ( hashBanBuffer[i] % thrd_num ) != id )
- {
- pos++;
- continue;
- }
-
- kmerCounter[id + 1]++;
- singleKmer ( i, KmerSets[id], seq_index, pos++ );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 2 )
- {
- for ( i = 0; i < read_c; i++ )
- {
- if ( i % thrd_num != id )
- {
- continue;
- }
-
- chopKmer4read ( i, id + 1 );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 3 )
- {
- * ( prm->selfSignal ) = 0;
- break;
- }
-
- usleep ( 1 );
- }
+ PARAMETER *prm;
+ unsigned int i;
+ unsigned char id;
+ prm = ( PARAMETER * ) para;
+ id = prm->threadID;
+
+ while ( 1 )
+ {
+ if ( * ( prm->selfSignal ) == 1 )
+ {
+ unsigned int seq_index = 0;
+ unsigned int pos = 0;
+
+ for ( i = 0; i < kmer_c; i++ )
+ {
+ if ( seq_index < read_c && indexArray[seq_index + 1] == i )
+ {
+ seq_index++; // which sequence this kmer belongs to
+ pos = 0;
+ }
+
+ if ( ( unsigned char ) ( hashBanBuffer[i] % thrd_num ) != id )
+ {
+ pos++;
+ continue;
+ }
+
+ kmerCounter[id + 1]++;
+ singleKmer ( i, KmerSets[id], seq_index, pos++ );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 2 )
+ {
+ for ( i = 0; i < read_c; i++ )
+ {
+ if ( i % thrd_num != id )
+ {
+ continue;
+ }
+
+ chopKmer4read ( i, id + 1 );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 3 )
+ {
+ * ( prm->selfSignal ) = 0;
+ break;
+ }
+
+ usleep ( 1 );
+ }
}
/*************************************************
@@ -126,158 +126,158 @@ Output:
Return:
None.
*************************************************/
-static void singleKmer ( int t, KmerSet * kset, unsigned int seq_index, unsigned int pos )
+static void singleKmer ( int t, KmerSet *kset, unsigned int seq_index, unsigned int pos )
{
- boolean flag;
- kmer_t * node;
- flag = put_kmerset ( kset, kmerBuffer[t], 4, 4, &node );
-
- if ( !flag )
- {
- if ( smallerBuffer[t] )
- {
- node->twin = 0;
- }
- else
- {
- node->twin = 1;
- };
-
- node->l_links = ctgIdArray[seq_index];
-
- node->r_links = pos;
- }
- else
- {
- node->deleted = 1;
- }
+ boolean flag;
+ kmer_t *node;
+ flag = put_kmerset ( kset, kmerBuffer[t], 4, 4, &node );
+
+ if ( !flag )
+ {
+ if ( smallerBuffer[t] )
+ {
+ node->twin = 0;
+ }
+ else
+ {
+ node->twin = 1;
+ };
+
+ node->l_links = ctgIdArray[seq_index];
+
+ node->r_links = pos;
+ }
+ else
+ {
+ node->deleted = 1;
+ }
}
-static void creatThrds ( pthread_t * threads, PARAMETER * paras )
+static void creatThrds ( pthread_t *threads, PARAMETER *paras )
{
- unsigned char i;
- int temp;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
- {
- fprintf ( stderr, "Create threads failed.\n" );
- exit ( 1 );
- }
- }
-
- fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
+ unsigned char i;
+ int temp;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
+ {
+ fprintf ( stderr, "Create threads failed.\n" );
+ exit ( 1 );
+ }
+ }
+
+ fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
}
-static void thread_wait ( pthread_t * threads )
+static void thread_wait ( pthread_t *threads )
{
- int i;
+ int i;
- for ( i = 0; i < thrd_num; i++ )
- if ( threads[i] != 0 )
- {
- pthread_join ( threads[i], NULL );
- }
+ for ( i = 0; i < thrd_num; i++ )
+ if ( threads[i] != 0 )
+ {
+ pthread_join ( threads[i], NULL );
+ }
}
static void chopKmer4read ( int t, int threadID )
{
- char * src_seq = seqBuffer + seqBreakers[t];
- char * bal_seq = rcSeq[threadID];
- int len_seq = lenBuffer[t];
- int j, bal_j;
- ubyte8 hash_ban, bal_hash_ban;
- Kmer word, bal_word;
- int index;
+ char *src_seq = seqBuffer + seqBreakers[t];
+ char *bal_seq = rcSeq[threadID];
+ int len_seq = lenBuffer[t];
+ int j, bal_j;
+ ubyte8 hash_ban, bal_hash_ban;
+ Kmer word, bal_word;
+ int index;
#ifdef MER127
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= src_seq[index];
+ }
#else
- word.high = word.low = 0;
+ word.high = word.low = 0;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= src_seq[index];
+ }
#endif
- reverseComplementSeq ( src_seq, len_seq, bal_seq );
- // complementary node
- bal_word = reverseComplement ( word, overlaplen );
- bal_j = len_seq - 0 - overlaplen;
- index = indexArray[t];
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- kmerBuffer[index] = word;
- hashBanBuffer[index] = hash_ban;
- smallerBuffer[index++] = 1;
- }
- else
- {
- bal_hash_ban = hash_kmer ( bal_word );
- kmerBuffer[index] = bal_word;
- hashBanBuffer[index] = bal_hash_ban;
- smallerBuffer[index++] = 0;
- }
-
- for ( j = 1; j <= len_seq - overlaplen; j++ )
- {
- word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
- bal_j = len_seq - j - overlaplen;
- bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- kmerBuffer[index] = word;
- hashBanBuffer[index] = hash_ban;
- smallerBuffer[index++] = 1;
- }
- else
- {
- // complementary node
- bal_hash_ban = hash_kmer ( bal_word );
- kmerBuffer[index] = bal_word;
- hashBanBuffer[index] = bal_hash_ban;
- smallerBuffer[index++] = 0;
- }
- }
+ reverseComplementSeq ( src_seq, len_seq, bal_seq );
+ // complementary node
+ bal_word = reverseComplement ( word, overlaplen );
+ bal_j = len_seq - 0 - overlaplen;
+ index = indexArray[t];
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ kmerBuffer[index] = word;
+ hashBanBuffer[index] = hash_ban;
+ smallerBuffer[index++] = 1;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer ( bal_word );
+ kmerBuffer[index] = bal_word;
+ hashBanBuffer[index] = bal_hash_ban;
+ smallerBuffer[index++] = 0;
+ }
+
+ for ( j = 1; j <= len_seq - overlaplen; j++ )
+ {
+ word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
+ bal_j = len_seq - j - overlaplen;
+ bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ kmerBuffer[index] = word;
+ hashBanBuffer[index] = hash_ban;
+ smallerBuffer[index++] = 1;
+ }
+ else
+ {
+ // complementary node
+ bal_hash_ban = hash_kmer ( bal_word );
+ kmerBuffer[index] = bal_word;
+ hashBanBuffer[index] = bal_hash_ban;
+ smallerBuffer[index++] = 0;
+ }
+ }
}
-static void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals )
+static void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals )
{
- int t;
-
- for ( t = 0; t < thrd_num; t++ )
- {
- thrdSignals[t + 1] = SIG;
- }
-
- while ( 1 )
- {
- usleep ( 10 );
-
- for ( t = 0; t < thrd_num; t++ )
- if ( thrdSignals[t + 1] )
- {
- break;
- }
-
- if ( t == thrd_num )
- {
- break;
- }
- }
+ int t;
+
+ for ( t = 0; t < thrd_num; t++ )
+ {
+ thrdSignals[t + 1] = SIG;
+ }
+
+ while ( 1 )
+ {
+ usleep ( 10 );
+
+ for ( t = 0; t < thrd_num; t++ )
+ if ( thrdSignals[t + 1] )
+ {
+ break;
+ }
+
+ if ( t == thrd_num )
+ {
+ break;
+ }
+ }
}
@@ -293,16 +293,16 @@ Output:
Return:
None.
*************************************************/
-static int getID ( char * name )
+static int getID ( char *name )
{
- if ( name[0] >= '0' && name[0] <= '9' )
- {
- return atoi ( & ( name[0] ) );
- }
- else
- {
- return 0;
- }
+ if ( name[0] >= '0' && name[0] <= '9' )
+ {
+ return atoi ( & ( name[0] ) );
+ }
+ else
+ {
+ return 0;
+ }
}
@@ -321,139 +321,139 @@ Output:
Return:
True always.
*************************************************/
-boolean prlContig2nodes ( char * grapfile, int len_cut )
+boolean prlContig2nodes ( char *grapfile, int len_cut )
{
- long long i, num_seq;
- char name[256], *next_name;
- FILE * fp;
- pthread_t threads[thrd_num];
- time_t start_t, stop_t;
- unsigned char thrdSignal[thrd_num + 1];
- PARAMETER paras[thrd_num];
- int maxCtgLen, minCtgLen, nameLen;
- unsigned int lenSum, contigId;
- //init
- WORDFILTER = createFilter ( overlaplen );
- time ( &start_t );
- sprintf ( name, "%s.contig", grapfile );
- fp = ckopen ( name, "r" );
- maxCtgLen = nameLen = 10;
- minCtgLen = 1000;
- num_seq = readseqpar ( &maxCtgLen, &minCtgLen, &nameLen, fp );
- fprintf ( stderr, "\n%lld contig(s), maximum sequence length %d, minimum sequence length %d, maximum name length %d.\n", num_seq, maxCtgLen, minCtgLen, nameLen );
- maxReadLen = maxCtgLen;
- fclose ( fp );
- time ( &stop_t );
- fprintf ( stderr, "Time spent on parsing contigs file: %ds.\n", ( int ) ( stop_t - start_t ) );
- next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- // extract all the EDONs
- seq_buffer_size = buffer_size * 2;
- max_read_c = seq_buffer_size / 20;
- kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
- smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
- seqBuffer = ( char * ) ckalloc ( seq_buffer_size * sizeof ( char ) );
- lenBuffer = ( int * ) ckalloc ( max_read_c * sizeof ( int ) );
- indexArray = ( unsigned int * ) ckalloc ( ( max_read_c + 1 ) * sizeof ( unsigned int ) );
- seqBreakers = ( unsigned int * ) ckalloc ( ( max_read_c + 1 ) * sizeof ( unsigned int ) );
- ctgIdArray = ( int * ) ckalloc ( max_read_c * sizeof ( int ) );
- fp = ckopen ( name, "r" );
- rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
-
- if ( 1 )
- {
- kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
- KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
-
- for ( i = 0; i < thrd_num; i++ )
- {
- KmerSets[i] = init_kmerset ( 1024, 0.77f );
- thrdSignal[i + 1] = 0;
- paras[i].threadID = i;
- paras[i].mainSignal = &thrdSignal[0];
- paras[i].selfSignal = &thrdSignal[i + 1];
- kmerCounter[i + 1] = 0;
- rcSeq[i + 1] = ( char * ) ckalloc ( maxCtgLen * sizeof ( char ) );
- }
-
- creatThrds ( threads, paras );
- }
-
- kmer_c = thrdSignal[0] = kmerCounter[0] = 0;
- time ( &start_t );
- read_c = lenSum = i = seqBreakers[0] = indexArray[0] = 0;
- readseq1by1 ( seqBuffer + seqBreakers[read_c], next_name, & ( lenBuffer[read_c] ), fp, -1 );
-
- while ( !feof ( fp ) )
- {
- contigId = getID ( next_name );
- readseq1by1 ( seqBuffer + seqBreakers[read_c], next_name, & ( lenBuffer[read_c] ), fp, 1 );
-
- if ( ( ++i ) % 10000000 == 0 )
- {
- fprintf ( stderr, "--- %lldth contig(s).\n", i );
- }
-
- if ( lenBuffer[read_c] < overlaplen + 1 || lenBuffer[read_c] < len_cut )
- {
- contigId = getID ( next_name );
- continue;
- }
-
- ctgIdArray[read_c] = contigId > 0 ? contigId : i;
- lenSum += lenBuffer[read_c];
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
- seqBreakers[read_c] = lenSum;
- indexArray[read_c] = kmer_c;
-
- if ( read_c == max_read_c || ( lenSum + maxCtgLen ) > seq_buffer_size || ( kmer_c + maxCtgLen - overlaplen + 1 ) > buffer_size )
- {
- kmerCounter[0] += kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //singleKmer
- kmer_c = read_c = lenSum = 0;
- }
- }
-
- if ( read_c )
- {
- kmerCounter[0] += kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //singleKmer
- }
-
- sendWorkSignal ( 3, thrdSignal ); //stop threads
- thread_wait ( threads );
- time ( &stop_t );
- fprintf ( stderr, "Time spent on hashing contigs: %ds.\n", ( int ) ( stop_t - start_t ) );
-
- if ( 1 )
- {
- unsigned long long alloCounter = 0;
- unsigned long long allKmerCounter = 0;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- alloCounter += count_kmerset ( ( KmerSets[i] ) );
- allKmerCounter += kmerCounter[i + 1];
- free ( ( void * ) rcSeq[i + 1] );
- }
-
- fprintf ( stderr, "%lli node(s) allocated, %lli kmer(s) in contigs, %lli kmer(s) processed.\n", alloCounter, kmerCounter[0], allKmerCounter );
- }
-
- free ( ( void * ) rcSeq );
- free ( ( void * ) kmerCounter );
- free ( ( void * ) seqBuffer );
- free ( ( void * ) lenBuffer );
- free ( ( void * ) indexArray );
- free ( ( void * ) seqBreakers );
- free ( ( void * ) ctgIdArray );
- free ( ( void * ) kmerBuffer );
- free ( ( void * ) hashBanBuffer );
- free ( ( void * ) smallerBuffer );
- free ( ( void * ) next_name );
- fclose ( fp );
- return 1;
+ long long i, num_seq;
+ char name[256], *next_name;
+ FILE *fp;
+ pthread_t threads[thrd_num];
+ time_t start_t, stop_t;
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+ int maxCtgLen, minCtgLen, nameLen;
+ unsigned int lenSum, contigId;
+ //init
+ WORDFILTER = createFilter ( overlaplen );
+ time ( &start_t );
+ sprintf ( name, "%s.contig", grapfile );
+ fp = ckopen ( name, "r" );
+ maxCtgLen = nameLen = 10;
+ minCtgLen = 1000;
+ num_seq = readseqpar ( &maxCtgLen, &minCtgLen, &nameLen, fp );
+ fprintf ( stderr, "\n%lld contig(s), maximum sequence length %d, minimum sequence length %d, maximum name length %d.\n", num_seq, maxCtgLen, minCtgLen, nameLen );
+ maxReadLen = maxCtgLen;
+ fclose ( fp );
+ time ( &stop_t );
+ fprintf ( stderr, "Time spent on parsing contigs file: %ds.\n", ( int ) ( stop_t - start_t ) );
+ next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ // extract all the EDONs
+ seq_buffer_size = buffer_size * 2;
+ max_read_c = seq_buffer_size / 20;
+ kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
+ smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
+ seqBuffer = ( char * ) ckalloc ( seq_buffer_size * sizeof ( char ) );
+ lenBuffer = ( int * ) ckalloc ( max_read_c * sizeof ( int ) );
+ indexArray = ( unsigned int * ) ckalloc ( ( max_read_c + 1 ) * sizeof ( unsigned int ) );
+ seqBreakers = ( unsigned int * ) ckalloc ( ( max_read_c + 1 ) * sizeof ( unsigned int ) );
+ ctgIdArray = ( int * ) ckalloc ( max_read_c * sizeof ( int ) );
+ fp = ckopen ( name, "r" );
+ rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
+
+ if ( 1 )
+ {
+ kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
+ KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ KmerSets[i] = init_kmerset ( 1024, 0.77f );
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ kmerCounter[i + 1] = 0;
+ rcSeq[i + 1] = ( char * ) ckalloc ( maxCtgLen * sizeof ( char ) );
+ }
+
+ creatThrds ( threads, paras );
+ }
+
+ kmer_c = thrdSignal[0] = kmerCounter[0] = 0;
+ time ( &start_t );
+ read_c = lenSum = i = seqBreakers[0] = indexArray[0] = 0;
+ readseq1by1 ( seqBuffer + seqBreakers[read_c], next_name, & ( lenBuffer[read_c] ), fp, -1 );
+
+ while ( !feof ( fp ) )
+ {
+ contigId = getID ( next_name );
+ readseq1by1 ( seqBuffer + seqBreakers[read_c], next_name, & ( lenBuffer[read_c] ), fp, 1 );
+
+ if ( ( ++i ) % 10000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth contig(s).\n", i );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 || lenBuffer[read_c] < len_cut )
+ {
+ contigId = getID ( next_name );
+ continue;
+ }
+
+ ctgIdArray[read_c] = contigId > 0 ? contigId : i;
+ lenSum += lenBuffer[read_c];
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+ seqBreakers[read_c] = lenSum;
+ indexArray[read_c] = kmer_c;
+
+ if ( read_c == max_read_c || ( lenSum + maxCtgLen ) > seq_buffer_size || ( kmer_c + maxCtgLen - overlaplen + 1 ) > buffer_size )
+ {
+ kmerCounter[0] += kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //singleKmer
+ kmer_c = read_c = lenSum = 0;
+ }
+ }
+
+ if ( read_c )
+ {
+ kmerCounter[0] += kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //singleKmer
+ }
+
+ sendWorkSignal ( 3, thrdSignal ); //stop threads
+ thread_wait ( threads );
+ time ( &stop_t );
+ fprintf ( stderr, "Time spent on hashing contigs: %ds.\n", ( int ) ( stop_t - start_t ) );
+
+ if ( 1 )
+ {
+ unsigned long long alloCounter = 0;
+ unsigned long long allKmerCounter = 0;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ alloCounter += count_kmerset ( ( KmerSets[i] ) );
+ allKmerCounter += kmerCounter[i + 1];
+ free ( ( void * ) rcSeq[i + 1] );
+ }
+
+ fprintf ( stderr, "%lli node(s) allocated, %lli kmer(s) in contigs, %lli kmer(s) processed.\n", alloCounter, kmerCounter[0], allKmerCounter );
+ }
+
+ free ( ( void * ) rcSeq );
+ free ( ( void * ) kmerCounter );
+ free ( ( void * ) seqBuffer );
+ free ( ( void * ) lenBuffer );
+ free ( ( void * ) indexArray );
+ free ( ( void * ) seqBreakers );
+ free ( ( void * ) ctgIdArray );
+ free ( ( void * ) kmerBuffer );
+ free ( ( void * ) hashBanBuffer );
+ free ( ( void * ) smallerBuffer );
+ free ( ( void * ) next_name );
+ fclose ( fp );
+ return 1;
}
diff --git a/standardPregraph/prlHashReads.c b/standardPregraph/prlHashReads.c
index 6a9158e..4705bcf 100644
--- a/standardPregraph/prlHashReads.c
+++ b/standardPregraph/prlHashReads.c
@@ -1,7 +1,7 @@
/*
* prlHashReads.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -27,261 +27,261 @@
#include "extvab.h"
//debugging variables
-static long long * tips;
-static long long * kmerCounter; //kmer number for each KmerSet[thread id]
+static long long *tips;
+static long long *kmerCounter; //kmer number for each KmerSet[thread id]
-static long long ** kmerFreq;
+static long long **kmerFreq;
//buffer related varibles for chop kmer
static int read_c;
-static char ** rcSeq; //sequence pointer for each thread
-static char ** seqBuffer; //read buffer
-static int * lenBuffer; //read length buffer
-static int * indexArray; // the read's begin kmer 's kmer_c
+static char **rcSeq; //sequence pointer for each thread
+static char **seqBuffer; //read buffer
+static int *lenBuffer; //read length buffer
+static int *indexArray; // the read's begin kmer 's kmer_c
//buffer related varibles for splay tree
static int buffer_size = 100000000; //buffer size for kmerBuffer...
static volatile int kmer_c; // kmer count number
-static Kmer * kmerBuffer; //kmer buffer array
-static ubyte8 * hashBanBuffer; //the buffered hash value for 'kmerBuffer'
-static char * nextcBuffer, *prevcBuffer; //next char buffer , previous char buffer for 'kmerBuffer'
+static Kmer *kmerBuffer; //kmer buffer array
+static ubyte8 *hashBanBuffer; //the buffered hash value for 'kmerBuffer'
+static char *nextcBuffer, *prevcBuffer; //next char buffer , previous char buffer for 'kmerBuffer'
static struct aiocb aio1;
static struct aiocb aio2;
-static char * aioBuffer1;
-static char * aioBuffer2;
-static char * readBuffer1;
-static char * readBuffer2;
+static char *aioBuffer1;
+static char *aioBuffer2;
+static char *readBuffer1;
+static char *readBuffer2;
-static void thread_mark ( KmerSet * set, unsigned char thrdID );
-static void Mark1in1outNode ( unsigned char * thrdSignal );
-static void thread_delow ( KmerSet * set, unsigned char thrdID );
-static void deLowCov ( unsigned char * thrdSignal );
+static void thread_mark ( KmerSet *set, unsigned char thrdID );
+static void Mark1in1outNode ( unsigned char *thrdSignal );
+static void thread_delow ( KmerSet *set, unsigned char thrdID );
+static void deLowCov ( unsigned char *thrdSignal );
-static void singleKmer ( int t, KmerSet * kset );
+static void singleKmer ( int t, KmerSet *kset );
static void chopKmer4read ( int t, int threadID );
-static void freqStat ( char * outfile );
+static void freqStat ( char *outfile );
-static void threadRoutine ( void * para )
+static void threadRoutine ( void *para )
{
- PARAMETER * prm;
- int i;
- unsigned char id;
- prm = ( PARAMETER * ) para;
- id = prm->threadID;
-
- //printf("%dth thread with threadID %d, hash_table %p\n",id,prm.threadID,prm.hash_table);
- while ( 1 )
- {
- if ( * ( prm->selfSignal ) == 1 )
- {
- for ( i = 0; i < kmer_c; i++ )
- {
- //if((unsigned char)(magic_seq(hashBanBuffer[i])%thrd_num)!=id)
- //if((kmerBuffer[i]%thrd_num)!=id)
- if ( ( hashBanBuffer[i] % thrd_num ) != id )
- {
- continue;
- }
-
- kmerCounter[id + 1]++;
- singleKmer ( i, KmerSets[id] );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 2 )
- {
- for ( i = 0; i < read_c; i++ )
- {
- if ( i % thrd_num != id )
- {
- continue;
- }
-
- chopKmer4read ( i, id + 1 );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 3 )
- {
- * ( prm->selfSignal ) = 0;
- break;
- }
- else if ( * ( prm->selfSignal ) == 4 )
- {
- thread_mark ( KmerSets[id], id );
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 5 )
- {
- thread_delow ( KmerSets[id], id );
- * ( prm->selfSignal ) = 0;
- }
-
- usleep ( 1 );
- }
+ PARAMETER *prm;
+ int i;
+ unsigned char id;
+ prm = ( PARAMETER * ) para;
+ id = prm->threadID;
+
+ //printf("%dth thread with threadID %d, hash_table %p\n",id,prm.threadID,prm.hash_table);
+ while ( 1 )
+ {
+ if ( * ( prm->selfSignal ) == 1 )
+ {
+ for ( i = 0; i < kmer_c; i++ )
+ {
+ //if((unsigned char)(magic_seq(hashBanBuffer[i])%thrd_num)!=id)
+ //if((kmerBuffer[i]%thrd_num)!=id)
+ if ( ( hashBanBuffer[i] % thrd_num ) != id )
+ {
+ continue;
+ }
+
+ kmerCounter[id + 1]++;
+ singleKmer ( i, KmerSets[id] );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 2 )
+ {
+ for ( i = 0; i < read_c; i++ )
+ {
+ if ( i % thrd_num != id )
+ {
+ continue;
+ }
+
+ chopKmer4read ( i, id + 1 );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 3 )
+ {
+ * ( prm->selfSignal ) = 0;
+ break;
+ }
+ else if ( * ( prm->selfSignal ) == 4 )
+ {
+ thread_mark ( KmerSets[id], id );
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 5 )
+ {
+ thread_delow ( KmerSets[id], id );
+ * ( prm->selfSignal ) = 0;
+ }
+
+ usleep ( 1 );
+ }
}
-static void singleKmer ( int t, KmerSet * kset )
+static void singleKmer ( int t, KmerSet *kset )
{
- kmer_t * pos;
- put_kmerset ( kset, kmerBuffer[t], prevcBuffer[t], nextcBuffer[t], &pos );
+ kmer_t *pos;
+ put_kmerset ( kset, kmerBuffer[t], prevcBuffer[t], nextcBuffer[t], &pos );
}
-static void creatThrds ( pthread_t * threads, PARAMETER * paras )
+static void creatThrds ( pthread_t *threads, PARAMETER *paras )
{
- unsigned char i;
- int temp;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
- if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
- {
- fprintf ( stderr, "Create threads failed.\n" );
- exit ( 1 );
- }
- }
-
- fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
+ unsigned char i;
+ int temp;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
+ if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
+ {
+ fprintf ( stderr, "Create threads failed.\n" );
+ exit ( 1 );
+ }
+ }
+
+ fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
}
-static void thread_wait ( pthread_t * threads )
+static void thread_wait ( pthread_t *threads )
{
- int i;
+ int i;
- for ( i = 0; i < thrd_num; i++ )
- if ( threads[i] != 0 )
- {
- pthread_join ( threads[i], NULL );
- }
+ for ( i = 0; i < thrd_num; i++ )
+ if ( threads[i] != 0 )
+ {
+ pthread_join ( threads[i], NULL );
+ }
}
static void chopKmer4read ( int t, int threadID )
{
- char * src_seq = seqBuffer[t];
- char * bal_seq = rcSeq[threadID];
- int len_seq = lenBuffer[t];
- int j, bal_j;
- ubyte8 hash_ban, bal_hash_ban;
- Kmer word, bal_word;
- int index;
- char InvalidCh = 4;
+ char *src_seq = seqBuffer[t];
+ char *bal_seq = rcSeq[threadID];
+ int len_seq = lenBuffer[t];
+ int j, bal_j;
+ ubyte8 hash_ban, bal_hash_ban;
+ Kmer word, bal_word;
+ int index;
+ char InvalidCh = 4;
#ifdef MER127
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= src_seq[index];
+ }
#else
- word.high = word.low = 0;
+ word.high = word.low = 0;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= src_seq[index];
+ }
#endif
- reverseComplementSeq ( src_seq, len_seq, bal_seq );
- // complementary node
- bal_word = reverseComplement ( word, overlaplen );
- bal_j = len_seq - overlaplen;
- index = indexArray[t];
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- hashBanBuffer[index] = hash_ban;
- kmerBuffer[index] = word;
- prevcBuffer[index] = InvalidCh;
- nextcBuffer[index++] = src_seq[0 + overlaplen];
- }
- else
- {
- bal_hash_ban = hash_kmer ( bal_word );
- hashBanBuffer[index] = bal_hash_ban;
- kmerBuffer[index] = bal_word;
- prevcBuffer[index] = bal_seq[bal_j - 1];
- nextcBuffer[index++] = InvalidCh;
- }
-
- for ( j = 1; j <= len_seq - overlaplen; j++ )
- {
- word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
- bal_j = len_seq - j - overlaplen;
- bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- hashBanBuffer[index] = hash_ban;
- kmerBuffer[index] = word;
- prevcBuffer[index] = src_seq[j - 1];
-
- if ( j < len_seq - overlaplen )
- {
- nextcBuffer[index++] = src_seq[j + overlaplen];
- }
- else
- {
- nextcBuffer[index++] = InvalidCh;
- }
-
- //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
- }
- else
- {
- // complementary node
- bal_hash_ban = hash_kmer ( bal_word );
- hashBanBuffer[index] = bal_hash_ban;
- kmerBuffer[index] = bal_word;
-
- if ( bal_j > 0 )
- {
- prevcBuffer[index] = bal_seq[bal_j - 1];
- }
- else
- {
- prevcBuffer[index] = InvalidCh;
- }
-
- nextcBuffer[index++] = bal_seq[bal_j + overlaplen];
- //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
- }
- }
+ reverseComplementSeq ( src_seq, len_seq, bal_seq );
+ // complementary node
+ bal_word = reverseComplement ( word, overlaplen );
+ bal_j = len_seq - overlaplen;
+ index = indexArray[t];
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ hashBanBuffer[index] = hash_ban;
+ kmerBuffer[index] = word;
+ prevcBuffer[index] = InvalidCh;
+ nextcBuffer[index++] = src_seq[0 + overlaplen];
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer ( bal_word );
+ hashBanBuffer[index] = bal_hash_ban;
+ kmerBuffer[index] = bal_word;
+ prevcBuffer[index] = bal_seq[bal_j - 1];
+ nextcBuffer[index++] = InvalidCh;
+ }
+
+ for ( j = 1; j <= len_seq - overlaplen; j++ )
+ {
+ word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
+ bal_j = len_seq - j - overlaplen;
+ bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ hashBanBuffer[index] = hash_ban;
+ kmerBuffer[index] = word;
+ prevcBuffer[index] = src_seq[j - 1];
+
+ if ( j < len_seq - overlaplen )
+ {
+ nextcBuffer[index++] = src_seq[j + overlaplen];
+ }
+ else
+ {
+ nextcBuffer[index++] = InvalidCh;
+ }
+
+ //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
+ }
+ else
+ {
+ // complementary node
+ bal_hash_ban = hash_kmer ( bal_word );
+ hashBanBuffer[index] = bal_hash_ban;
+ kmerBuffer[index] = bal_word;
+
+ if ( bal_j > 0 )
+ {
+ prevcBuffer[index] = bal_seq[bal_j - 1];
+ }
+ else
+ {
+ prevcBuffer[index] = InvalidCh;
+ }
+
+ nextcBuffer[index++] = bal_seq[bal_j + overlaplen];
+ //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
+ }
+ }
}
-static void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals )
+static void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals )
{
- int t;
-
- for ( t = 0; t < thrd_num; t++ )
- {
- thrdSignals[t + 1] = SIG;
- }
-
- while ( 1 )
- {
- usleep ( 10 );
-
- for ( t = 0; t < thrd_num; t++ )
- if ( thrdSignals[t + 1] )
- {
- break;
- }
-
- if ( t == thrd_num )
- {
- break;
- }
- }
+ int t;
+
+ for ( t = 0; t < thrd_num; t++ )
+ {
+ thrdSignals[t + 1] = SIG;
+ }
+
+ while ( 1 )
+ {
+ usleep ( 10 );
+
+ for ( t = 0; t < thrd_num; t++ )
+ if ( thrdSignals[t + 1] )
+ {
+ break;
+ }
+
+ if ( t == thrd_num )
+ {
+ break;
+ }
+ }
}
/*************************************************
@@ -301,745 +301,804 @@ Output:
Return:
1 if exits normally.
*************************************************/
-boolean prlRead2HashTable ( char * libfile, char * outfile )
+boolean prlRead2HashTable ( char *libfile, char *outfile )
{
- char * cach1;
- char * cach2;
- unsigned char asm_ctg = 1;
- long long i;
- char * next_name, name[256];
- FILE * fo;
- time_t start_t, stop_t;
- int maxReadNum;
- int libNo;
- pthread_t threads[thrd_num];
- unsigned char thrdSignal[thrd_num + 1];
- PARAMETER paras[thrd_num];
- boolean flag, pairs = 0;
- WORDFILTER = createFilter ( overlaplen );
- maxReadLen = 0;
- maxNameLen = 256;
- scan_libInfo ( libfile );
- alloc_pe_mem ( num_libs );
-
- if ( !maxReadLen )
- {
- maxReadLen = 100;
- }
-
- if ( gLineLen < maxReadLen )
- {
- gStr = ( char * ) ckalloc ( ( maxReadLen + 1 ) * sizeof ( char ) );
- }
-
- //init
- maxReadLen4all = maxReadLen;
- fprintf ( stderr, "In %s, %d lib(s), maximum read length %d, maximum name length %d.\n\n", libfile, num_libs, maxReadLen, maxNameLen );
- next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
- prevcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
- nextcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
- maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
- //printf("buffer size %d, max read len %d, max read num %d\n",buffer_size,maxReadLen,maxReadNum);
- int maxAIOSize = 32768;
- aioBuffer1 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
- aioBuffer2 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
- readBuffer1 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024
- readBuffer2 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //1024
- cach1 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
- cach2 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
- memset ( cach1, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
- memset ( cach2, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
- seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
- lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
- indexArray = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
-
- for ( i = 0; i < maxReadNum; i++ )
- {
- seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- }
-
- rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
-
- if ( 1 )
- {
- kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
- KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
- ubyte8 init_size = 1024;
- ubyte8 k = 0;
-
- if ( initKmerSetSize )
- {
+ char *cach1;
+ char *cach2;
+ unsigned char asm_ctg = 1;
+ long long i;
+ char *next_name, name[256];
+ FILE *fo;
+ time_t start_t, stop_t;
+ int maxReadNum;
+ int libNo;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+ boolean flag, pairs = 0;
+ WORDFILTER = createFilter ( overlaplen );
+ maxReadLen = 0;
+ maxNameLen = 256;
+ scan_libInfo ( libfile );
+ alloc_pe_mem ( num_libs );
+
+ if ( !maxReadLen )
+ {
+ maxReadLen = 100;
+ }
+
+ if ( gLineLen < maxReadLen )
+ {
+ gStr = ( char * ) ckalloc ( ( maxReadLen + 1 ) * sizeof ( char ) );
+ }
+
+ //init
+ maxReadLen4all = maxReadLen;
+ fprintf ( stderr, "In %s, %d lib(s), maximum read length %d, maximum name length %d.\n\n", libfile, num_libs, maxReadLen, maxNameLen );
+ next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
+ prevcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
+ nextcBuffer = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
+ maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
+ //printf("buffer size %d, max read len %d, max read num %d\n",buffer_size,maxReadLen,maxReadNum);
+ int maxAIOSize = 32768;
+ aioBuffer1 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
+ aioBuffer2 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
+ readBuffer1 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024
+ readBuffer2 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //1024
+ cach1 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
+ cach2 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
+ memset ( cach1, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
+ memset ( cach2, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
+ seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
+ lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
+ indexArray = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
+
+ if ( 1 )
+ {
+ kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
+ KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
+ ubyte8 init_size = 1024;
+ ubyte8 k = 0;
+
+ if ( initKmerSetSize )
+ {
#ifdef MER127
- init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 40 );
+ init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 40 );
#else
- init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 24 ); //is it true?
+ init_size = ( ubyte8 ) ( ( double ) initKmerSetSize * 1024.0f * 1024.0f * 1024.0f / ( double ) thrd_num / 24 ); //is it true?
#endif
- do
- {
- ++k;
- }
- while ( k * 0xFFFFFFLLU < init_size );
- }
-
- for ( i = 0; i < thrd_num; i++ )
- {
- //KmerSets[i] = init_kmerset(1024,0.77f);
- KmerSets[i] = init_kmerset ( ( ( initKmerSetSize ) ? ( k * 0xFFFFFFLLU ) : ( init_size ) ), 0.77f );
- thrdSignal[i + 1] = 0;
- paras[i].threadID = i;
- paras[i].mainSignal = &thrdSignal[0];
- paras[i].selfSignal = &thrdSignal[i + 1];
- kmerCounter[i + 1] = 0;
- rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- }
-
- creatThrds ( threads, paras );
- }
-
- thrdSignal[0] = kmerCounter[0] = 0;
- time ( &start_t );
- kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;
-
- while ( openNextFile ( &libNo, pairs, asm_ctg ) )
- {
- //read bam file
- if ( lib_array[libNo].curr_type == 4 )
- {
- int type = 0; //deside the PE reads is good or bad
-
- while ( ( flag = read1seqInLibBam ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), &libNo, pairs, 1, &type ) ) != 0 )
- {
- if ( type == -1 ) //if the reads is bad, go back.
- {
- i--;
-
- if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
- {
- kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
- read_c--;
- }
-
- n_solexa -= 2;
- continue;
- }
-
- if ( ( ++i ) % 100000000 == 0 )
- { fprintf ( stderr, "--- %lldth reads.\n", i ); }
-
- if ( lenBuffer[read_c] < 0 )
- { fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- { continue; }
-
- /*
- if(lenBuffer[read_c]>70)
- lenBuffer[read_c] = 50;
- else if(lenBuffer[read_c]>40)
- lenBuffer[read_c] = 40;
- */
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
-
- if ( read_c == maxReadNum )
- {
- kmerCounter[0] += kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //singleKmer
- kmer_c = read_c = 0;
- }
- }
- }
- //read PE fasta or fastq
- else if ( lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2 )
- {
- initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
- initAIO ( &aio2, aioBuffer2, fileno ( lib_array[libNo].fp2 ), maxAIOSize );
- int offset1, offset2, flag1, flag2, rt1, rt2;
- offset1 = offset2 = 0;
- rt1 = aio_read ( &aio1 );
- rt2 = aio_read ( &aio2 );
- flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
- flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
-
- if ( flag1 && flag2 )
- {
- int start1, start2, turn;
- start1 = start2 = 0;
- turn = 1;
-
- while ( start1 < offset1 || start2 < offset2 )
- {
- if ( turn == 1 )
- {
- turn = 2;
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );
-
- if ( ( ++i ) % 100000000 == 0 )
- { fprintf ( stderr, "--- %lldth reads.\n", i ); }
-
- if ( lenBuffer[read_c] < 0 )
- { fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- {
- if ( start1 >= offset1 )
- {
- start1 = 0;
- offset1 = 0;
- flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
- }
-
- continue;
- }
-
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
-
- if ( start1 >= offset1 )
- {
- start1 = 0;
- offset1 = 0;
- flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
- }
-
- if ( read_c == maxReadNum )
- {
- kmerCounter[0] += kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //singleKmer
- kmer_c = read_c = 0;
- }
-
- continue;
- }
-
- if ( turn == 2 )
- {
- turn = 1;
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );
-
- if ( ( ++i ) % 100000000 == 0 )
- { fprintf ( stderr, "--- %lldth reads.\n", i ); }
-
- if ( lenBuffer[read_c] < 0 )
- { fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- {
- if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
- { break; }
-
- if ( start2 >= offset2 )
- {
- start2 = 0;
- offset2 = 0;
- flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
- }
-
- continue;
- }
-
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
-
- if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
- { break; }
-
- if ( start2 >= offset2 )
- {
- start2 = 0;
- offset2 = 0;
- flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
- }
-
- if ( read_c == maxReadNum )
- {
- kmerCounter[0] += kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //singleKmer
- kmer_c = read_c = 0;
- }
-
- continue;
- }
- }
- }
- else
- {
- fprintf(stderr, "Error: aio_read error.\n");
- }
- }
- //read single fasta, single fastq and PE fasta in one file
- else
- {
- initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
- int offset, flag1, rt;
- offset = 0;
- rt = aio_read ( &aio1 );
-
- while ( ( flag1 = AIORead ( &aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type ) ) )
- {
- int start = 0;
-
- while ( start < offset )
- {
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );
-
- if ( ( ++i ) % 100000000 == 0 )
- { fprintf ( stderr, "--- %lldth reads.\n", i ); }
-
- if ( lenBuffer[read_c] < 0 )
- { fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] ); }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- { continue; }
-
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
- }
-
- if ( read_c > maxReadNum - 1024 )
- {
- kmerCounter[0] += kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //singleKmer
- kmer_c = read_c = 0;
- }
-
- if ( flag1 == 2 )
- { break; }
- }
- }
- }
-
- if ( read_c )
- {
- kmerCounter[0] += kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //singleKmer
- }
-
- time ( &stop_t );
- fprintf ( stderr, "Time spent on hashing reads: %ds, %lld read(s) processed.\n", ( int ) ( stop_t - start_t ), i );
-
- //record insert size info
- if ( pairs )
- {
- if ( gradsCounter )
- { fprintf ( stderr, "%d pe insert size, the largest boundary is %lld.\n\n", gradsCounter, pes[gradsCounter - 1].PE_bound ); }
- else
- {
- fprintf ( stderr, "No paired reads found.\n" );
- }
-
- sprintf ( name, "%s.peGrads", outfile );
- fo = ckopen ( name, "w" );
- fprintf ( fo, "grads&num: %d\t%lld\n", gradsCounter, n_solexa );
-
- for ( i = 0; i < gradsCounter; i++ )
- {
- fprintf ( fo, "%d\t%lld\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank );
- }
-
- fclose ( fo );
- }
-
- free_pe_mem ();
- free_libs ();
-
- if ( 1 )
- {
- unsigned long long alloCounter = 0;
- unsigned long long allKmerCounter = 0;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- alloCounter += count_kmerset ( ( KmerSets[i] ) );
- allKmerCounter += kmerCounter[i + 1];
- free ( ( void * ) rcSeq[i + 1] );
- }
-
- fprintf ( stderr, "%lli node(s) allocated, %lli kmer(s) in reads, %lli kmer(s) processed.\n", alloCounter, kmerCounter[0], allKmerCounter );
- }
-
- free ( ( void * ) rcSeq );
- free ( ( void * ) kmerCounter );
-
- for ( i = 0; i < maxReadNum; i++ )
- {
- free ( ( void * ) seqBuffer[i] );
- }
-
- free ( ( void * ) seqBuffer );
- free ( ( void * ) lenBuffer );
- free ( ( void * ) indexArray );
- free ( ( void * ) kmerBuffer );
- free ( ( void * ) hashBanBuffer );
- free ( ( void * ) nextcBuffer );
- free ( ( void * ) prevcBuffer );
- free ( ( void * ) next_name );
- free ( ( void * ) aioBuffer1 );
- free ( ( void * ) aioBuffer2 );
- free ( ( void * ) readBuffer1 );
- free ( ( void * ) readBuffer2 );
- free ( ( void * ) cach1 );
- free ( ( void * ) cach2 );
- fprintf ( stderr, "done hashing nodes\n" );
-
- if ( deLowKmer )
- {
- time ( &start_t );
- deLowCov ( thrdSignal );
- time ( &stop_t );
- fprintf ( stderr, "Time spent on delowcvgNode: %ds.\n", ( int ) ( stop_t - start_t ) );
- }
-
- time ( &start_t );
- Mark1in1outNode ( thrdSignal );
- freqStat ( outfile );
- time ( &stop_t );
- fprintf ( stderr, "Time spent on marking linear nodes: %ds.\n", ( int ) ( stop_t - start_t ) );
- sendWorkSignal ( 3, thrdSignal ); //exit
- thread_wait ( threads );
- return 1;
+ do
+ {
+ ++k;
+ }
+ while ( k * 0xFFFFFFLLU < init_size );
+ }
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ //KmerSets[i] = init_kmerset(1024,0.77f);
+ KmerSets[i] = init_kmerset ( ( ( initKmerSetSize ) ? ( k * 0xFFFFFFLLU ) : ( init_size ) ), 0.77f );
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ kmerCounter[i + 1] = 0;
+ rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ creatThrds ( threads, paras );
+ }
+
+ thrdSignal[0] = kmerCounter[0] = 0;
+ time ( &start_t );
+ kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;
+
+ while ( openNextFile ( &libNo, pairs, asm_ctg ) )
+ {
+ //read bam file
+ if ( lib_array[libNo].curr_type == 4 )
+ {
+ int type = 0; //deside the PE reads is good or bad
+
+ while ( ( flag = read1seqInLibBam ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), &libNo, pairs, 1, &type ) ) != 0 )
+ {
+ if ( type == -1 ) //if the reads is bad, go back.
+ {
+ i--;
+
+ if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
+ {
+ kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
+ read_c--;
+ }
+
+ n_solexa -= 2;
+ continue;
+ }
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < 0 )
+ {
+ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ continue;
+ }
+
+ /*
+ if(lenBuffer[read_c]>70)
+ lenBuffer[read_c] = 50;
+ else if(lenBuffer[read_c]>40)
+ lenBuffer[read_c] = 40;
+ */
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+
+ if ( read_c == maxReadNum )
+ {
+ kmerCounter[0] += kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //singleKmer
+ kmer_c = read_c = 0;
+ }
+ }
+ }
+ //read PE fasta or fastq
+ else if ( lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2 )
+ {
+ initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
+ initAIO ( &aio2, aioBuffer2, fileno ( lib_array[libNo].fp2 ), maxAIOSize );
+ int offset1, offset2, flag1, flag2, rt1, rt2;
+ offset1 = offset2 = 0;
+ rt1 = aio_read ( &aio1 );
+ rt2 = aio_read ( &aio2 );
+ flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
+ flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
+
+ if ( flag1 && flag2 )
+ {
+ int start1, start2, turn;
+ start1 = start2 = 0;
+ turn = 1;
+
+ while ( start1 < offset1 || start2 < offset2 )
+ {
+ if ( turn == 1 )
+ {
+ turn = 2;
+ readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < 0 )
+ {
+ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ if ( start1 >= offset1 )
+ {
+ start1 = 0;
+ offset1 = 0;
+ flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
+ }
+
+ continue;
+ }
+
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+
+ if ( start1 >= offset1 )
+ {
+ start1 = 0;
+ offset1 = 0;
+ flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
+ }
+
+ if ( read_c == maxReadNum )
+ {
+ kmerCounter[0] += kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //singleKmer
+ kmer_c = read_c = 0;
+ }
+
+ continue;
+ }
+
+ if ( turn == 2 )
+ {
+ turn = 1;
+ readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < 0 )
+ {
+ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
+ {
+ break;
+ }
+
+ if ( start2 >= offset2 )
+ {
+ start2 = 0;
+ offset2 = 0;
+ flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
+ }
+
+ continue;
+ }
+
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+
+ if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
+ {
+ break;
+ }
+
+ if ( start2 >= offset2 )
+ {
+ start2 = 0;
+ offset2 = 0;
+ flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
+ }
+
+ if ( read_c == maxReadNum )
+ {
+ kmerCounter[0] += kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //singleKmer
+ kmer_c = read_c = 0;
+ }
+
+ continue;
+ }
+ }
+ }
+ else
+ {
+ fprintf(stderr, "Error: aio_read error.\n");
+ }
+ }
+ //read single fasta, single fastq and PE fasta in one file
+ else
+ {
+ initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
+ int offset, flag1, rt;
+ offset = 0;
+ rt = aio_read ( &aio1 );
+
+ while ( ( flag1 = AIORead ( &aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type ) ) )
+ {
+ int start = 0;
+
+ while ( start < offset )
+ {
+ readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < 0 )
+ {
+ fprintf ( stderr, "Read len %d.\n", lenBuffer[read_c] );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ continue;
+ }
+
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+ }
+
+ if ( read_c > maxReadNum - 1024 )
+ {
+ kmerCounter[0] += kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //singleKmer
+ kmer_c = read_c = 0;
+ }
+
+ if ( flag1 == 2 )
+ {
+ break;
+ }
+ }
+ }
+ }
+
+ if ( read_c )
+ {
+ kmerCounter[0] += kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //singleKmer
+ }
+
+ time ( &stop_t );
+ fprintf ( stderr, "Time spent on hashing reads: %ds, %lld read(s) processed.\n", ( int ) ( stop_t - start_t ), i );
+
+ //record insert size info
+ if ( pairs )
+ {
+ if ( gradsCounter )
+ {
+ fprintf ( stderr, "%d pe insert size, the largest boundary is %lld.\n\n", gradsCounter, pes[gradsCounter - 1].PE_bound );
+ }
+ else
+ {
+ fprintf ( stderr, "No paired reads found.\n" );
+ }
+
+ sprintf ( name, "%s.peGrads", outfile );
+ fo = ckopen ( name, "w" );
+ fprintf ( fo, "grads&num: %d\t%lld\n", gradsCounter, n_solexa );
+
+ for ( i = 0; i < gradsCounter; i++ )
+ {
+ fprintf ( fo, "%d\t%lld\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank );
+ }
+
+ fclose ( fo );
+ }
+
+ free_pe_mem ();
+ free_libs ();
+
+ if ( 1 )
+ {
+ unsigned long long alloCounter = 0;
+ unsigned long long allKmerCounter = 0;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ alloCounter += count_kmerset ( ( KmerSets[i] ) );
+ allKmerCounter += kmerCounter[i + 1];
+ free ( ( void * ) rcSeq[i + 1] );
+ }
+
+ fprintf ( stderr, "%lli node(s) allocated, %lli kmer(s) in reads, %lli kmer(s) processed.\n", alloCounter, kmerCounter[0], allKmerCounter );
+ }
+
+ free ( ( void * ) rcSeq );
+ free ( ( void * ) kmerCounter );
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ free ( ( void * ) seqBuffer[i] );
+ }
+
+ free ( ( void * ) seqBuffer );
+ free ( ( void * ) lenBuffer );
+ free ( ( void * ) indexArray );
+ free ( ( void * ) kmerBuffer );
+ free ( ( void * ) hashBanBuffer );
+ free ( ( void * ) nextcBuffer );
+ free ( ( void * ) prevcBuffer );
+ free ( ( void * ) next_name );
+ free ( ( void * ) aioBuffer1 );
+ free ( ( void * ) aioBuffer2 );
+ free ( ( void * ) readBuffer1 );
+ free ( ( void * ) readBuffer2 );
+ free ( ( void * ) cach1 );
+ free ( ( void * ) cach2 );
+ fprintf ( stderr, "done hashing nodes\n" );
+
+ if ( deLowKmer )
+ {
+ time ( &start_t );
+ deLowCov ( thrdSignal );
+ time ( &stop_t );
+ fprintf ( stderr, "Time spent on delowcvgNode: %ds.\n", ( int ) ( stop_t - start_t ) );
+ }
+
+ time ( &start_t );
+ Mark1in1outNode ( thrdSignal );
+ freqStat ( outfile );
+ time ( &stop_t );
+ fprintf ( stderr, "Time spent on marking linear nodes: %ds.\n", ( int ) ( stop_t - start_t ) );
+ sendWorkSignal ( 3, thrdSignal ); //exit
+ thread_wait ( threads );
+ return 1;
}
-void initAIO ( struct aiocb * aio, char * buf, int fd, int size )
+void initAIO ( struct aiocb *aio, char *buf, int fd, int size )
{
- bzero ( aio, sizeof ( struct aiocb ) );
- aio->aio_buf = ( void * ) buf;
- aio->aio_fildes = fd;
- aio->aio_nbytes = size;
- aio->aio_offset = 0;
+ bzero ( aio, sizeof ( struct aiocb ) );
+ aio->aio_buf = ( void * ) buf;
+ aio->aio_fildes = fd;
+ aio->aio_nbytes = size;
+ aio->aio_offset = 0;
}
-int AIORead ( struct aiocb * mycb, int * offset, char * buf, char * cach, int * rt, int curr_type )
+int AIORead ( struct aiocb *mycb, int *offset, char *buf, char *cach, int *rt, int curr_type )
{
- int i, i2, i3, j;
- int num;
- size_t mode, get, max_list;
-
- // rt = aio_read(mycb);
- if ( *rt == 0 )
- {
- struct aiocb * aiocb_list[1];
- aiocb_list[0] = mycb;
- max_list = 1;
-
- while ( 1 )
- {
- mode = aio_suspend ( ( const struct aiocb * const * ) aiocb_list, max_list, NULL );
-
- if ( mode == -1 )
- {
- if ( errno != EAGAIN && errno != EINTR )
- {
- fprintf ( stderr, "Error:%s.\n", errno );
- return 0;
- }
- else
- { continue; }
- }
- else
- {
- //while(aio_error(mycb) == EINPROGRESS);
- get = aio_return ( mycb );
- j = strlen ( cach );
-
- if ( get > 0 )
- {
- char * temp = ( char * ) ( ( *mycb ).aio_buf );
-
- if ( ( get % 32768 ) != 0 )
- {
- strcpy ( buf, cach );
- memcpy ( &buf[j], temp, get );
- memset ( cach, '\0', j );
- //printf("%s",buf);
- *offset = j + get;
- return 2;
- }
-
- if ( ( curr_type == 2 ) || ( curr_type == 6 ) )
- {
- num = 0;
-
- for ( i = get - 1; ( temp[i] != '@' ) || ( temp[i - 1] != '\n' ); i-- )
- {
- if ( temp[i] == '\n' ) {num++;}
- }
-
- if ( num <= 1 )
- {
- for ( i2 = i - 2; temp[i2] != '\n'; i2-- ) { ; }
-
- if ( temp[i2 + 1] == '+' )
- {
- for ( i2 = i2 - 1; temp[i2] != '\n'; i2-- ) { ; }
-
- if ( temp[i2 + 1] != '+' ) {for ( i = i2 - 1; ( temp[i] != '@' ) || ( temp[i - 1] != '\n' ); i-- ) { ; }}
- }
- }
- }
- else if ( ( curr_type == 1 ) || ( curr_type == 3 ) || ( curr_type == 5 ) )
- for ( i = get - 1; temp[i] != '>'; i-- ) { ; }
-
- //for (i = get - 1; temp[i] != '>' && temp[i] != '@'; i--) ;
- strcpy ( buf, cach );
- memcpy ( &buf[j], temp, i );
- //printf("%s",buf);
- *offset = i + j;
- memset ( cach, '\0', j );
- memcpy ( cach, &temp[i], get - i );
- ( *mycb ).aio_offset += get;
- *rt = aio_read ( mycb );
- return 1;
- }
- else
- {
- fprintf(stderr, "Error: aio_return error.\n");
- }
- /*else
- {
- char *temp = (char *)((*mycb).aio_buf);
- strcpy(buf,cach);
- strcpy(&buf[j],temp);
- *offset = j + get;
- return 2;
- } */
- }
- }
- }
- else
- {
- fprintf(stderr, "Error: (*rt != 0) in AIORead.\n");
- }
-
- return 0;
+ int i, i2, i3, j;
+ int num;
+ size_t mode, get, max_list;
+
+ // rt = aio_read(mycb);
+ if ( *rt == 0 )
+ {
+ struct aiocb *aiocb_list[1];
+ aiocb_list[0] = mycb;
+ max_list = 1;
+
+ while ( 1 )
+ {
+ mode = aio_suspend ( ( const struct aiocb * const * ) aiocb_list, max_list, NULL );
+
+ if ( mode == -1 )
+ {
+ if ( errno != EAGAIN && errno != EINTR )
+ {
+ fprintf ( stderr, "Error:%s.\n", errno );
+ return 0;
+ }
+ else
+ {
+ continue;
+ }
+ }
+ else
+ {
+ //while(aio_error(mycb) == EINPROGRESS);
+ get = aio_return ( mycb );
+ j = strlen ( cach );
+
+ if ( get > 0 )
+ {
+ char *temp = ( char * ) ( ( *mycb ).aio_buf );
+
+ if ( ( get % 32768 ) != 0 )
+ {
+ strcpy ( buf, cach );
+ memcpy ( &buf[j], temp, get );
+ memset ( cach, '\0', j );
+ //printf("%s",buf);
+ *offset = j + get;
+ return 2;
+ }
+
+ if ( ( curr_type == 2 ) || ( curr_type == 6 ) )
+ {
+ num = 0;
+
+ for ( i = get - 1; ( temp[i] != '@' ) || ( temp[i - 1] != '\n' ); i-- )
+ {
+ if ( temp[i] == '\n' )
+ {
+ num++;
+ }
+ }
+
+ if ( num <= 1 )
+ {
+ for ( i2 = i - 2; temp[i2] != '\n'; i2-- )
+ {
+ ;
+ }
+
+ if ( temp[i2 + 1] == '+' )
+ {
+ for ( i2 = i2 - 1; temp[i2] != '\n'; i2-- )
+ {
+ ;
+ }
+
+ if ( temp[i2 + 1] != '+' )
+ {
+ for ( i = i2 - 1; ( temp[i] != '@' ) || ( temp[i - 1] != '\n' ); i-- )
+ {
+ ;
+ }
+ }
+ }
+ }
+ }
+ else if ( ( curr_type == 1 ) || ( curr_type == 3 ) || ( curr_type == 5 ) )
+ for ( i = get - 1; temp[i] != '>'; i-- )
+ {
+ ;
+ }
+
+ //for (i = get - 1; temp[i] != '>' && temp[i] != '@'; i--) ;
+ strcpy ( buf, cach );
+ memcpy ( &buf[j], temp, i );
+ //printf("%s",buf);
+ *offset = i + j;
+ memset ( cach, '\0', j );
+ memcpy ( cach, &temp[i], get - i );
+ ( *mycb ).aio_offset += get;
+ *rt = aio_read ( mycb );
+ return 1;
+ }
+ else
+ {
+ fprintf(stderr, "Error: aio_return error.\n");
+ }
+
+ /*else
+ {
+ char *temp = (char *)((*mycb).aio_buf);
+ strcpy(buf,cach);
+ strcpy(&buf[j],temp);
+ *offset = j + get;
+ return 2;
+ } */
+ }
+ }
+ }
+ else
+ {
+ fprintf(stderr, "Error: (*rt != 0) in AIORead.\n");
+ }
+
+ return 0;
}
-boolean openNextFile ( int * libNo, boolean pairs, unsigned char asm_ctg )
+boolean openNextFile ( int *libNo, boolean pairs, unsigned char asm_ctg )
{
- int i = *libNo;
- int prevLib = i;
-
- if ( lib_array[i].fp1 )
- { closeFp1InLab ( i ); }
-
- if ( lib_array[i].fp2 )
- { closeFp2InLab ( i ); }
-
- *libNo = nextValidIndex ( i, pairs, asm_ctg );
- i = *libNo;
-
- if ( lib_array[i].rd_len_cutoff > 0 )
- { maxReadLen = lib_array[i].rd_len_cutoff < maxReadLen4all ? lib_array[i].rd_len_cutoff : maxReadLen4all; }
- else
- { maxReadLen = maxReadLen4all; }
-
- //record insert size info
- //printf("from lib %d to %d, read %lld to %ld\n",prevLib,i,readNumBack,n_solexa);
- if ( pairs && i != prevLib )
- {
- if ( readNumBack < n_solexa )
- {
- pes[gradsCounter].PE_bound = n_solexa;
- pes[gradsCounter].rank = lib_array[prevLib].rank;
- pes[gradsCounter].pair_num_cut = lib_array[prevLib].pair_num_cut;
- pes[gradsCounter++].insertS = lib_array[prevLib].avg_ins;
- readNumBack = n_solexa;
- }
- }
-
- if ( i >= num_libs )
- { return 0; }
-
- openFileInLib ( i );
- return 1;
+ int i = *libNo;
+ int prevLib = i;
+
+ if ( lib_array[i].fp1 )
+ {
+ closeFp1InLab ( i );
+ }
+
+ if ( lib_array[i].fp2 )
+ {
+ closeFp2InLab ( i );
+ }
+
+ *libNo = nextValidIndex ( i, pairs, asm_ctg );
+ i = *libNo;
+
+ if ( lib_array[i].rd_len_cutoff > 0 )
+ {
+ maxReadLen = lib_array[i].rd_len_cutoff < maxReadLen4all ? lib_array[i].rd_len_cutoff : maxReadLen4all;
+ }
+ else
+ {
+ maxReadLen = maxReadLen4all;
+ }
+
+ //record insert size info
+ //printf("from lib %d to %d, read %lld to %ld\n",prevLib,i,readNumBack,n_solexa);
+ if ( pairs && i != prevLib )
+ {
+ if ( readNumBack < n_solexa )
+ {
+ pes[gradsCounter].PE_bound = n_solexa;
+ pes[gradsCounter].rank = lib_array[prevLib].rank;
+ pes[gradsCounter].pair_num_cut = lib_array[prevLib].pair_num_cut;
+ pes[gradsCounter++].insertS = lib_array[prevLib].avg_ins;
+ readNumBack = n_solexa;
+ }
+ }
+
+ if ( i >= num_libs )
+ {
+ return 0;
+ }
+
+ openFileInLib ( i );
+ return 1;
}
-static void thread_delow ( KmerSet * set, unsigned char thrdID )
+static void thread_delow ( KmerSet *set, unsigned char thrdID )
{
- int i, in_num, out_num, cvgSingle;
- int l_cvg, r_cvg;
- kmer_t * rs;
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- in_num = out_num = l_cvg = r_cvg = 0;
- rs = set->array + set->iter_ptr;
-
- for ( i = 0; i < 4; i++ )
- {
- cvgSingle = get_kmer_left_cov ( *rs, i );
-
- if ( cvgSingle > 0 && cvgSingle <= deLowKmer )
- {
- set_kmer_left_cov ( *rs, i, 0 );
- }
-
- cvgSingle = get_kmer_right_cov ( *rs, i );
-
- if ( cvgSingle > 0 && cvgSingle <= deLowKmer )
- {
- set_kmer_right_cov ( *rs, i, 0 );
- }
- }
-
- if ( rs->l_links == 0 && rs->r_links == 0 )
- {
- rs->deleted = 1;
- tips[thrdID]++;
- }
- }
-
- set->iter_ptr++;
- }
-
- //printf("%lld single nodes, %lld linear\n",counter,tips[thrdID]);
+ int i, in_num, out_num, cvgSingle;
+ int l_cvg, r_cvg;
+ kmer_t *rs;
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ in_num = out_num = l_cvg = r_cvg = 0;
+ rs = set->array + set->iter_ptr;
+
+ for ( i = 0; i < 4; i++ )
+ {
+ cvgSingle = get_kmer_left_cov ( *rs, i );
+
+ if ( cvgSingle > 0 && cvgSingle <= deLowKmer )
+ {
+ set_kmer_left_cov ( *rs, i, 0 );
+ }
+
+ cvgSingle = get_kmer_right_cov ( *rs, i );
+
+ if ( cvgSingle > 0 && cvgSingle <= deLowKmer )
+ {
+ set_kmer_right_cov ( *rs, i, 0 );
+ }
+ }
+
+ if ( rs->l_links == 0 && rs->r_links == 0 )
+ {
+ rs->deleted = 1;
+ tips[thrdID]++;
+ }
+ }
+
+ set->iter_ptr++;
+ }
+
+ //printf("%lld single nodes, %lld linear\n",counter,tips[thrdID]);
}
-static void deLowCov ( unsigned char * thrdSignal )
+static void deLowCov ( unsigned char *thrdSignal )
{
- int i;
- long long counter = 0;
- tips = ( long long * ) ckalloc ( thrd_num * sizeof ( long long ) );
+ int i;
+ long long counter = 0;
+ tips = ( long long * ) ckalloc ( thrd_num * sizeof ( long long ) );
- for ( i = 0; i < thrd_num; i++ )
- {
- tips[i] = 0;
- }
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ tips[i] = 0;
+ }
- sendWorkSignal ( 5, thrdSignal ); //mark linear nodes
+ sendWorkSignal ( 5, thrdSignal ); //mark linear nodes
- for ( i = 0; i < thrd_num; i++ )
- {
- counter += tips[i];
- }
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ counter += tips[i];
+ }
- free ( ( void * ) tips );
- fprintf ( stderr, "%lld kmer(s) removed.\n", counter );
+ free ( ( void * ) tips );
+ fprintf ( stderr, "%lld kmer(s) removed.\n", counter );
}
-static void thread_mark ( KmerSet * set, unsigned char thrdID )
+static void thread_mark ( KmerSet *set, unsigned char thrdID )
{
- int i, in_num, out_num, cvgSingle;
- int l_cvg, r_cvg;
- kmer_t * rs;
- long long counter = 0;
- set->iter_ptr = 0;
-
- while ( set->iter_ptr < set->size )
- {
- if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
- {
- in_num = out_num = l_cvg = r_cvg = 0;
- rs = set->array + set->iter_ptr;
-
- for ( i = 0; i < 4; i++ )
- {
- cvgSingle = get_kmer_left_cov ( *rs, i );
-
- if ( cvgSingle > 0 )
- {
- in_num++;
- l_cvg += cvgSingle;
- }
-
- cvgSingle = get_kmer_right_cov ( *rs, i );
-
- if ( cvgSingle > 0 )
- {
- out_num++;
- r_cvg += cvgSingle;
- }
- }
-
- if ( rs->single )
- {
- kmerFreq[thrdID][1]++;
- counter++;
- }
- else
- {
- kmerFreq[thrdID][ ( l_cvg > r_cvg ? l_cvg : r_cvg )]++;
- }
-
- if ( in_num == 1 && out_num == 1 )
- {
- rs->linear = 1;
- tips[thrdID]++;
- }
- }
-
- set->iter_ptr++;
- }
-
- //printf("%lld single nodes, %lld linear\n",counter,tips[thrdID]);
+ int i, in_num, out_num, cvgSingle;
+ int l_cvg, r_cvg;
+ kmer_t *rs;
+ long long counter = 0;
+ set->iter_ptr = 0;
+
+ while ( set->iter_ptr < set->size )
+ {
+ if ( !is_kmer_entity_null ( set->flags, set->iter_ptr ) )
+ {
+ in_num = out_num = l_cvg = r_cvg = 0;
+ rs = set->array + set->iter_ptr;
+
+ for ( i = 0; i < 4; i++ )
+ {
+ cvgSingle = get_kmer_left_cov ( *rs, i );
+
+ if ( cvgSingle > 0 )
+ {
+ in_num++;
+ l_cvg += cvgSingle;
+ }
+
+ cvgSingle = get_kmer_right_cov ( *rs, i );
+
+ if ( cvgSingle > 0 )
+ {
+ out_num++;
+ r_cvg += cvgSingle;
+ }
+ }
+
+ if ( rs->single )
+ {
+ kmerFreq[thrdID][1]++;
+ counter++;
+ }
+ else
+ {
+ kmerFreq[thrdID][ ( l_cvg > r_cvg ? l_cvg : r_cvg )]++;
+ }
+
+ if ( in_num == 1 && out_num == 1 )
+ {
+ rs->linear = 1;
+ tips[thrdID]++;
+ }
+ }
+
+ set->iter_ptr++;
+ }
+
+ //printf("%lld single nodes, %lld linear\n",counter,tips[thrdID]);
}
-static void Mark1in1outNode ( unsigned char * thrdSignal )
+static void Mark1in1outNode ( unsigned char *thrdSignal )
{
- int i;
- long long counter = 0;
- tips = ( long long * ) ckalloc ( thrd_num * sizeof ( long long ) );
- kmerFreq = ( long long ** ) ckalloc ( thrd_num * sizeof ( long long * ) );
-
- for ( i = 0; i < thrd_num; i++ )
- {
- kmerFreq[i] = ( long long * ) ckalloc ( 257 * sizeof ( long long ) );
- memset ( kmerFreq[i], 0, 257 * sizeof ( long long ) );
- tips[i] = 0;
- }
-
- sendWorkSignal ( 4, thrdSignal ); //mark linear nodes
-
- for ( i = 0; i < thrd_num; i++ )
- {
- counter += tips[i];
- }
-
- free ( ( void * ) tips );
- fprintf ( stderr, "%lld linear node(s) marked.\n", counter );
+ int i;
+ long long counter = 0;
+ tips = ( long long * ) ckalloc ( thrd_num * sizeof ( long long ) );
+ kmerFreq = ( long long ** ) ckalloc ( thrd_num * sizeof ( long long * ) );
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ kmerFreq[i] = ( long long * ) ckalloc ( 257 * sizeof ( long long ) );
+ memset ( kmerFreq[i], 0, 257 * sizeof ( long long ) );
+ tips[i] = 0;
+ }
+
+ sendWorkSignal ( 4, thrdSignal ); //mark linear nodes
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ counter += tips[i];
+ }
+
+ free ( ( void * ) tips );
+ fprintf ( stderr, "%lld linear node(s) marked.\n", counter );
}
-static void freqStat ( char * outfile )
+static void freqStat ( char *outfile )
{
- FILE * fo;
- char name[256];
- int i, j;
- long long sum;
- sprintf ( name, "%s.kmerFreq", outfile );
- fo = ckopen ( name, "w" );
-
- for ( i = 1; i < 256; i++ )
- {
- sum = 0;
-
- for ( j = 0; j < thrd_num; j++ )
- {
- sum += kmerFreq[j][i];
- }
-
- fprintf ( fo, "%lld\n", sum );
- }
-
- for ( i = 0; i < thrd_num; i++ )
- {
- free ( ( void * ) kmerFreq[i] );
- }
-
- free ( ( void * ) kmerFreq );
- fclose ( fo );
+ FILE *fo;
+ char name[256];
+ int i, j;
+ long long sum;
+ sprintf ( name, "%s.kmerFreq", outfile );
+ fo = ckopen ( name, "w" );
+
+ for ( i = 1; i < 256; i++ )
+ {
+ sum = 0;
+
+ for ( j = 0; j < thrd_num; j++ )
+ {
+ sum += kmerFreq[j][i];
+ }
+
+ fprintf ( fo, "%lld\n", sum );
+ }
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ free ( ( void * ) kmerFreq[i] );
+ }
+
+ free ( ( void * ) kmerFreq );
+ fclose ( fo );
}
diff --git a/standardPregraph/prlRead2Ctg.c b/standardPregraph/prlRead2Ctg.c
index b0c70bc..45e7c7b 100644
--- a/standardPregraph/prlRead2Ctg.c
+++ b/standardPregraph/prlRead2Ctg.c
@@ -1,7 +1,7 @@
/*
* prlRead2Ctg.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -36,109 +36,109 @@ static int ALIGNLEN = 0;
//buffer related varibles for chop kmer
static int read_c;
-static char ** rcSeq;
-static char ** seqBuffer;
-static int * lenBuffer;
-static unsigned int * ctgIdArray;
-static int * posArray;
-static char * orienArray;
-static char * footprint; // flag indicates whether the read shoulld leave markers on contigs
-static char ** read_name;
+static char **rcSeq;
+static char **seqBuffer;
+static int *lenBuffer;
+static unsigned int *ctgIdArray;
+static int *posArray;
+static char *orienArray;
+static char *footprint; // flag indicates whether the read shoulld leave markers on contigs
+static char **read_name;
// kmer related variables
static int kmer_c;
-static Kmer * kmerBuffer;
-static ubyte8 * hashBanBuffer;
-static kmer_t ** nodeBuffer;
-static boolean * smallerBuffer;
-static unsigned int * indexArray;
-static int * insSizeArray;
-
-static int * deletion;
+static Kmer *kmerBuffer;
+static ubyte8 *hashBanBuffer;
+static kmer_t **nodeBuffer;
+static boolean *smallerBuffer;
+static unsigned int *indexArray;
+static int *insSizeArray;
+
+static int *deletion;
static void parse1read ( int t );
-static void threadRoutine ( void * thrdID );
-static void searchKmer ( int t, KmerSet * kset );
+static void threadRoutine ( void *thrdID );
+static void searchKmer ( int t, KmerSet *kset );
static void chopKmer4read ( int t, int threadID );
-static void thread_wait ( pthread_t * threads );
+static void thread_wait ( pthread_t *threads );
-static void creatThrds ( pthread_t * threads, PARAMETER * paras )
+static void creatThrds ( pthread_t *threads, PARAMETER *paras )
{
- unsigned char i;
- int temp;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
- {
- fprintf ( stderr, "Create threads failed.\n" );
- exit ( 1 );
- }
- }
-
- fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
+ unsigned char i;
+ int temp;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
+ {
+ fprintf ( stderr, "Create threads failed.\n" );
+ exit ( 1 );
+ }
+ }
+
+ fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
}
-static void threadRoutine ( void * para )
+static void threadRoutine ( void *para )
{
- PARAMETER * prm;
- int i, t;
- unsigned char id;
- prm = ( PARAMETER * ) para;
- id = prm->threadID;
-
- while ( 1 )
- {
- if ( * ( prm->selfSignal ) == 1 )
- {
- for ( i = 0; i < kmer_c; i++ )
- {
- if ( ( hashBanBuffer[i] % thrd_num ) != id )
- {
- continue;
- }
-
- searchKmer ( i, KmerSets[id] );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 2 )
- {
- for ( i = 0; i < read_c; i++ )
- {
- if ( i % thrd_num != id )
- {
- continue;
- }
-
- chopKmer4read ( i, id + 1 );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 3 )
- {
- // parse reads
- for ( t = 0; t < read_c; t++ )
- {
- if ( t % thrd_num != id )
- {
- continue;
- }
-
- parse1read ( t );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 5 )
- {
- * ( prm->selfSignal ) = 0;
- break;
- }
-
- usleep ( 1 );
- }
+ PARAMETER *prm;
+ int i, t;
+ unsigned char id;
+ prm = ( PARAMETER * ) para;
+ id = prm->threadID;
+
+ while ( 1 )
+ {
+ if ( * ( prm->selfSignal ) == 1 )
+ {
+ for ( i = 0; i < kmer_c; i++ )
+ {
+ if ( ( hashBanBuffer[i] % thrd_num ) != id )
+ {
+ continue;
+ }
+
+ searchKmer ( i, KmerSets[id] );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 2 )
+ {
+ for ( i = 0; i < read_c; i++ )
+ {
+ if ( i % thrd_num != id )
+ {
+ continue;
+ }
+
+ chopKmer4read ( i, id + 1 );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 3 )
+ {
+ // parse reads
+ for ( t = 0; t < read_c; t++ )
+ {
+ if ( t % thrd_num != id )
+ {
+ continue;
+ }
+
+ parse1read ( t );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 5 )
+ {
+ * ( prm->selfSignal ) = 0;
+ break;
+ }
+
+ usleep ( 1 );
+ }
}
/*
@@ -152,97 +152,97 @@ static void chopReads()
*/
static void chopKmer4read ( int t, int threadID )
{
- int len_seq = lenBuffer[t];
-
- if ( len_seq < overlaplen + 1 )
- {
- return;
- }
-
- char * src_seq = seqBuffer[t];
- char * bal_seq = rcSeq[threadID];
- int j, bal_j;
- ubyte8 hash_ban, bal_hash_ban;
- Kmer word, bal_word;
- int index;
+ int len_seq = lenBuffer[t];
+
+ if ( len_seq < overlaplen + 1 )
+ {
+ return;
+ }
+
+ char *src_seq = seqBuffer[t];
+ char *bal_seq = rcSeq[threadID];
+ int j, bal_j;
+ ubyte8 hash_ban, bal_hash_ban;
+ Kmer word, bal_word;
+ int index;
#ifdef MER127
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= src_seq[index];
+ }
#else
- word.high = word.low = 0;
+ word.high = word.low = 0;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= src_seq[index];
+ }
#endif
- reverseComplementSeq ( src_seq, len_seq, bal_seq );
- // complementary node
- bal_word = reverseComplement ( word, overlaplen );
- bal_j = len_seq - 0 - overlaplen;
- index = indexArray[t];
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- kmerBuffer[index] = word;
- smallerBuffer[index] = 1;
- hashBanBuffer[index++] = hash_ban;
- }
- else
- {
- bal_hash_ban = hash_kmer ( bal_word );
- kmerBuffer[index] = bal_word;
- smallerBuffer[index] = 0;
- hashBanBuffer[index++] = bal_hash_ban;
- }
-
- for ( j = 1; j <= len_seq - overlaplen; j++ )
- {
- word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
- bal_j = len_seq - j - overlaplen;
- bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- kmerBuffer[index] = word;
- smallerBuffer[index] = 1;
- hashBanBuffer[index++] = hash_ban;
- }
- else
- {
- // complementary node
- bal_hash_ban = hash_kmer ( bal_word );
- kmerBuffer[index] = bal_word;
- smallerBuffer[index] = 0;
- hashBanBuffer[index++] = bal_hash_ban;
- }
- }
+ reverseComplementSeq ( src_seq, len_seq, bal_seq );
+ // complementary node
+ bal_word = reverseComplement ( word, overlaplen );
+ bal_j = len_seq - 0 - overlaplen;
+ index = indexArray[t];
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ kmerBuffer[index] = word;
+ smallerBuffer[index] = 1;
+ hashBanBuffer[index++] = hash_ban;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer ( bal_word );
+ kmerBuffer[index] = bal_word;
+ smallerBuffer[index] = 0;
+ hashBanBuffer[index++] = bal_hash_ban;
+ }
+
+ for ( j = 1; j <= len_seq - overlaplen; j++ )
+ {
+ word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
+ bal_j = len_seq - j - overlaplen;
+ bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ kmerBuffer[index] = word;
+ smallerBuffer[index] = 1;
+ hashBanBuffer[index++] = hash_ban;
+ }
+ else
+ {
+ // complementary node
+ bal_hash_ban = hash_kmer ( bal_word );
+ kmerBuffer[index] = bal_word;
+ smallerBuffer[index] = 0;
+ hashBanBuffer[index++] = bal_hash_ban;
+ }
+ }
}
//splay for one kmer in buffer and save the node to nodeBuffer
-static void searchKmer ( int t, KmerSet * kset )
+static void searchKmer ( int t, KmerSet *kset )
{
- kmer_t * node;
- boolean found = search_kmerset ( kset, kmerBuffer[t], &node );
-
- if ( found && !node->deleted )
- {
- nodeBuffer[t] = node;
- }
- else
- {
- nodeBuffer[t] = NULL;
- }
+ kmer_t *node;
+ boolean found = search_kmerset ( kset, kmerBuffer[t], &node );
+
+ if ( found && !node->deleted )
+ {
+ nodeBuffer[t] = node;
+ }
+ else
+ {
+ nodeBuffer[t] = NULL;
+ }
}
/*************************************************
@@ -259,283 +259,289 @@ Return:
*************************************************/
static void parse1read ( int t )
{
- unsigned int j, i, s;
- unsigned int contigID;
- int counter = 0, counter2 = 0;
- unsigned int ctgLen, pos = 0;
- kmer_t * node;
- boolean isSmaller;
- int flag, maxOcc = 0;
- kmer_t * maxNode = NULL;
- int alldgnLen = lenBuffer[t] > ALIGNLEN ? ALIGNLEN : lenBuffer[t];
- int multi = alldgnLen - overlaplen + 1 < 2 ? 2 : alldgnLen - overlaplen + 1;
- unsigned int start, finish;
- footprint[t] = 0;
- start = indexArray[t];
- finish = indexArray[t + 1];
-
- if ( finish == start )
- {
- ctgIdArray[t] = 0;
- return;
- }
-
- for ( j = start; j < finish; j++ )
- {
- node = nodeBuffer[j];
-
- if ( !node ) //same as previous
- {
- continue;
- }
-
- flag = 1;
-
- for ( s = j + 1; s < finish; s++ )
- {
- if ( !nodeBuffer[s] )
- {
- continue;
- }
-
- if ( nodeBuffer[s]->l_links == node->l_links )
- {
- flag++;
- nodeBuffer[s] = NULL;
- }
- }
-
- if ( ( overlaplen < 32 && flag >= 2 ) || overlaplen > 32 )
- {
- counter2++;
- }
-
- if ( flag >= multi )
- {
- counter++;
- }
- else
- {
- continue;
- }
-
- if ( flag > maxOcc )
- {
- pos = j;
- maxOcc = flag;
- maxNode = node;
- }
- }
-
- if ( !counter )
- {
- ctgIdArray[t] = 0;
- return;
- }
-
- if ( counter2 > 1 )
- {
- footprint[t] = 1;
- }
-
- j = pos;
- i = pos - start + 1;
- node = nodeBuffer[j];
- isSmaller = smallerBuffer[j];
- contigID = node->l_links;
- ctgLen = contig_array[contigID].length;
- pos = node->r_links;
-
- if ( node->twin == isSmaller )
- {
- orienArray[t] = '-';
- ctgIdArray[t] = getTwinCtg ( contigID );
- posArray[t] = ctgLen - pos - overlaplen - i + 1;
- }
- else
- {
- orienArray[t] = '+';
- ctgIdArray[t] = contigID;
- posArray[t] = pos - i + 1;
- }
+ unsigned int j, i, s;
+ unsigned int contigID;
+ int counter = 0, counter2 = 0;
+ unsigned int ctgLen, pos = 0;
+ kmer_t *node;
+ boolean isSmaller;
+ int flag, maxOcc = 0;
+ kmer_t *maxNode = NULL;
+ int alldgnLen = lenBuffer[t] > ALIGNLEN ? ALIGNLEN : lenBuffer[t];
+ int multi = alldgnLen - overlaplen + 1 < 2 ? 2 : alldgnLen - overlaplen + 1;
+ unsigned int start, finish;
+ footprint[t] = 0;
+ start = indexArray[t];
+ finish = indexArray[t + 1];
+
+ if ( finish == start )
+ {
+ ctgIdArray[t] = 0;
+ return;
+ }
+
+ for ( j = start; j < finish; j++ )
+ {
+ node = nodeBuffer[j];
+
+ if ( !node ) //same as previous
+ {
+ continue;
+ }
+
+ flag = 1;
+
+ for ( s = j + 1; s < finish; s++ )
+ {
+ if ( !nodeBuffer[s] )
+ {
+ continue;
+ }
+
+ if ( nodeBuffer[s]->l_links == node->l_links )
+ {
+ flag++;
+ nodeBuffer[s] = NULL;
+ }
+ }
+
+ if ( ( overlaplen < 32 && flag >= 2 ) || overlaplen > 32 )
+ {
+ counter2++;
+ }
+
+ if ( flag >= multi )
+ {
+ counter++;
+ }
+ else
+ {
+ continue;
+ }
+
+ if ( flag > maxOcc )
+ {
+ pos = j;
+ maxOcc = flag;
+ maxNode = node;
+ }
+ }
+
+ if ( !counter )
+ {
+ ctgIdArray[t] = 0;
+ return;
+ }
+
+ if ( counter2 > 1 )
+ {
+ footprint[t] = 1;
+ }
+
+ j = pos;
+ i = pos - start + 1;
+ node = nodeBuffer[j];
+ isSmaller = smallerBuffer[j];
+ contigID = node->l_links;
+ ctgLen = contig_array[contigID].length;
+ pos = node->r_links;
+
+ if ( node->twin == isSmaller )
+ {
+ orienArray[t] = '-';
+ ctgIdArray[t] = getTwinCtg ( contigID );
+ posArray[t] = ctgLen - pos - overlaplen - i + 1;
+ }
+ else
+ {
+ orienArray[t] = '+';
+ ctgIdArray[t] = contigID;
+ posArray[t] = pos - i + 1;
+ }
}
-static void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals )
+static void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals )
{
- int t;
-
- for ( t = 0; t < thrd_num; t++ )
- {
- thrdSignals[t + 1] = SIG;
- }
-
- while ( 1 )
- {
- usleep ( 10 );
-
- for ( t = 0; t < thrd_num; t++ )
- if ( thrdSignals[t + 1] )
- {
- break;
- }
-
- if ( t == thrd_num )
- {
- break;
- }
- }
+ int t;
+
+ for ( t = 0; t < thrd_num; t++ )
+ {
+ thrdSignals[t + 1] = SIG;
+ }
+
+ while ( 1 )
+ {
+ usleep ( 10 );
+
+ for ( t = 0; t < thrd_num; t++ )
+ if ( thrdSignals[t + 1] )
+ {
+ break;
+ }
+
+ if ( t == thrd_num )
+ {
+ break;
+ }
+ }
}
static void locate1read ( int t )
{
- int i, j, start, finish;
- kmer_t * node;
- unsigned int contigID;
- int pos, ctgLen;
- boolean isSmaller;
- start = indexArray[t];
- finish = indexArray[t + 1];
-
- for ( j = start; j < finish; j++ )
- {
- node = nodeBuffer[j];
-
- if ( !node ) //same as previous
- {
- continue;
- }
-
- i = j - start + 1;
- isSmaller = smallerBuffer[j];
- contigID = node->l_links;
- ctgLen = contig_array[contigID].length;
- pos = node->r_links;
-
- if ( node->twin == isSmaller )
- {
- ctgIdArray[t] = getTwinCtg ( contigID );
- posArray[t] = ctgLen - pos - overlaplen - i + 1;
- }
- else
- {
- ctgIdArray[t] = contigID;
- posArray[t] = pos - i + 1;
- }
- }
+ int i, j, start, finish;
+ kmer_t *node;
+ unsigned int contigID;
+ int pos, ctgLen;
+ boolean isSmaller;
+ start = indexArray[t];
+ finish = indexArray[t + 1];
+
+ for ( j = start; j < finish; j++ )
+ {
+ node = nodeBuffer[j];
+
+ if ( !node ) //same as previous
+ {
+ continue;
+ }
+
+ i = j - start + 1;
+ isSmaller = smallerBuffer[j];
+ contigID = node->l_links;
+ ctgLen = contig_array[contigID].length;
+ pos = node->r_links;
+
+ if ( node->twin == isSmaller )
+ {
+ ctgIdArray[t] = getTwinCtg ( contigID );
+ posArray[t] = ctgLen - pos - overlaplen - i + 1;
+ }
+ else
+ {
+ ctgIdArray[t] = contigID;
+ posArray[t] = pos - i + 1;
+ }
+ }
}
-static void output1read_gz ( int t, gzFile * outfp, gzFile * outfp2, char orien, int dhflag )
+static void output1read_gz ( int t, gzFile *outfp, gzFile *outfp2, char orien, int dhflag )
{
- int len = lenBuffer[t];
- int index;
- readsInGap++;
+ int len = lenBuffer[t];
+ int index;
+ readsInGap++;
- for ( index = 0; index < len; index++ )
- {
- writeChar2tightString ( seqBuffer[t][index], rcSeq[1], index );
- }
+ for ( index = 0; index < len; index++ )
+ {
+ writeChar2tightString ( seqBuffer[t][index], rcSeq[1], index );
+ }
- gzwrite ( outfp, &len, sizeof ( int ) );
- gzwrite ( outfp, &ctgIdArray[t], sizeof ( int ) );
- gzwrite ( outfp, &posArray[t], sizeof ( int ) );
- gzwrite ( outfp, rcSeq[1], ( unsigned ) ( len / 4 + 1 ) );
+ gzwrite ( outfp, &len, sizeof ( int ) );
+ gzwrite ( outfp, &ctgIdArray[t], sizeof ( int ) );
+ gzwrite ( outfp, &posArray[t], sizeof ( int ) );
+ gzwrite ( outfp, rcSeq[1], ( unsigned ) ( len / 4 + 1 ) );
- if ( fill && insSizeArray[t] < 2000 && len > 0 )
- {
- gzprintf ( outfp2, ">%d\t%d\t%d\t%c\t%d\t%d\n", len, ctgIdArray[t], posArray[t], orien, insSizeArray[t], dhflag );
+ if ( fill && insSizeArray[t] < 2000 && len > 0 )
+ {
+ gzprintf ( outfp2, ">%d\t%d\t%d\t%c\t%d\t%d\n", len, ctgIdArray[t], posArray[t], orien, insSizeArray[t], dhflag );
- for ( index = 0; index < len; index++ )
- { gzprintf ( outfp2, "%c", int2base ( ( int ) seqBuffer[t][index] ) ); }
+ for ( index = 0; index < len; index++ )
+ {
+ gzprintf ( outfp2, "%c", int2base ( ( int ) seqBuffer[t][index] ) );
+ }
- gzprintf ( outfp2, "\n" );
- }
+ gzprintf ( outfp2, "\n" );
+ }
}
-static void output1read ( int t, FILE * outfp1, FILE * outfp2, char orien, int dhflag )
+static void output1read ( int t, FILE *outfp1, FILE *outfp2, char orien, int dhflag )
{
- int len = lenBuffer[t];
- int index;
- readsInGap++;
-
- /*
- if(ctgIdArray[t]==735||ctgIdArray[t]==getTwinCtg(735)){
- printf("%d\t%d\t%d\t",t+1,ctgIdArray[t],posArray[t]);
- int j;
- for(j=0;j<len;j++)
- printf("%c",int2base((int)seqBuffer[t][j]));
- printf("\n");
- }
- */
- for ( index = 0; index < len; index++ )
- {
- writeChar2tightString ( seqBuffer[t][index], rcSeq[1], index );
- }
-
- fwrite ( &len, sizeof ( int ), 1, outfp1 );
- fwrite ( &ctgIdArray[t], sizeof ( int ), 1, outfp1 );
- fwrite ( &posArray[t], sizeof ( int ), 1, outfp1 );
- fwrite ( rcSeq[1], sizeof ( char ), len / 4 + 1, outfp1 );
-
- if ( fill && insSizeArray[t] < 2000 && len > 0 )
- {
- fprintf ( outfp2, ">%d\t%d\t%d\t%c\t%d\t%d\n", len, ctgIdArray[t], posArray[t], orien, insSizeArray[t], dhflag );
-
- for ( index = 0; index < len; index++ )
- { fprintf ( outfp2, "%c", int2base ( ( int ) seqBuffer[t][index] ) ); }
-
- fprintf ( outfp2, "\n" );
- }
+ int len = lenBuffer[t];
+ int index;
+ readsInGap++;
+
+ /*
+ if(ctgIdArray[t]==735||ctgIdArray[t]==getTwinCtg(735)){
+ printf("%d\t%d\t%d\t",t+1,ctgIdArray[t],posArray[t]);
+ int j;
+ for(j=0;j<len;j++)
+ printf("%c",int2base((int)seqBuffer[t][j]));
+ printf("\n");
+ }
+ */
+ for ( index = 0; index < len; index++ )
+ {
+ writeChar2tightString ( seqBuffer[t][index], rcSeq[1], index );
+ }
+
+ fwrite ( &len, sizeof ( int ), 1, outfp1 );
+ fwrite ( &ctgIdArray[t], sizeof ( int ), 1, outfp1 );
+ fwrite ( &posArray[t], sizeof ( int ), 1, outfp1 );
+ fwrite ( rcSeq[1], sizeof ( char ), len / 4 + 1, outfp1 );
+
+ if ( fill && insSizeArray[t] < 2000 && len > 0 )
+ {
+ fprintf ( outfp2, ">%d\t%d\t%d\t%c\t%d\t%d\n", len, ctgIdArray[t], posArray[t], orien, insSizeArray[t], dhflag );
+
+ for ( index = 0; index < len; index++ )
+ {
+ fprintf ( outfp2, "%c", int2base ( ( int ) seqBuffer[t][index] ) );
+ }
+
+ fprintf ( outfp2, "\n" );
+ }
}
-static void output1Nread ( int t, FILE * outfp )
+static void output1Nread ( int t, FILE *outfp )
{
- int len = lenBuffer[t];
- int index;
- fprintf ( outfp, "%d\t%d\t%d\n", lenBuffer[t], ctgIdArray[t], posArray[t] );
- fprintf ( outfp, ">%s\n", read_name[t] );
-
- for ( index = 0; index < len; index++ )
- { fprintf ( outfp, "%c", int2base ( ( int ) seqBuffer[t][index] ) ); }
+ int len = lenBuffer[t];
+ int index;
+ fprintf ( outfp, "%d\t%d\t%d\n", lenBuffer[t], ctgIdArray[t], posArray[t] );
+ fprintf ( outfp, ">%s\n", read_name[t] );
+
+ for ( index = 0; index < len; index++ )
+ {
+ fprintf ( outfp, "%c", int2base ( ( int ) seqBuffer[t][index] ) );
+ }
- fprintf ( outfp, "\n" );
+ fprintf ( outfp, "\n" );
}
-static void getPEreadOnContig ( int t, gzFile * outfp )
+static void getPEreadOnContig ( int t, gzFile *outfp )
{
- int len1, len2, index;
- char orien1, orien2;
- len1 = lenBuffer[t - 1];
- len2 = lenBuffer[t];
- orien1 = orienArray[t - 1];
- orien2 = orienArray[t];
-
- if ( insSizeArray[t] < 2000 && insSizeArray[t] == insSizeArray[t - 1] )
- {
- gzwrite ( outfp, &len1, sizeof ( int ) );
- gzwrite ( outfp, &ctgIdArray[t - 1], sizeof ( int ) );
- gzwrite ( outfp, &posArray[t - 1], sizeof ( int ) );
- gzwrite ( outfp, &orien1, sizeof ( char ) );
- gzwrite ( outfp, &insSizeArray[t - 1], sizeof ( int ) );
-
- for ( index = 0; index < len1; index++ )
- {
- writeChar2tightString ( seqBuffer[t - 1][index], rcSeq[1], index );
- }
-
- gzwrite ( outfp, rcSeq[1], ( unsigned ) ( len1 / 4 + 1 ) );
- gzwrite ( outfp, &len2, sizeof ( int ) );
- gzwrite ( outfp, &ctgIdArray[t], sizeof ( int ) );
- gzwrite ( outfp, &posArray[t], sizeof ( int ) );
- gzwrite ( outfp, &orien2, sizeof ( char ) );
- gzwrite ( outfp, &insSizeArray[t], sizeof ( int ) );
-
- for ( index = 0; index < len2; index++ )
- {
- writeChar2tightString ( seqBuffer[t][index], rcSeq[1], index );
- }
-
- gzwrite ( outfp, rcSeq[1], ( unsigned ) ( len2 / 4 + 1 ) );
- }
+ int len1, len2, index;
+ char orien1, orien2;
+ len1 = lenBuffer[t - 1];
+ len2 = lenBuffer[t];
+ orien1 = orienArray[t - 1];
+ orien2 = orienArray[t];
+
+ if ( insSizeArray[t] < 2000 && insSizeArray[t] == insSizeArray[t - 1] )
+ {
+ gzwrite ( outfp, &len1, sizeof ( int ) );
+ gzwrite ( outfp, &ctgIdArray[t - 1], sizeof ( int ) );
+ gzwrite ( outfp, &posArray[t - 1], sizeof ( int ) );
+ gzwrite ( outfp, &orien1, sizeof ( char ) );
+ gzwrite ( outfp, &insSizeArray[t - 1], sizeof ( int ) );
+
+ for ( index = 0; index < len1; index++ )
+ {
+ writeChar2tightString ( seqBuffer[t - 1][index], rcSeq[1], index );
+ }
+
+ gzwrite ( outfp, rcSeq[1], ( unsigned ) ( len1 / 4 + 1 ) );
+ gzwrite ( outfp, &len2, sizeof ( int ) );
+ gzwrite ( outfp, &ctgIdArray[t], sizeof ( int ) );
+ gzwrite ( outfp, &posArray[t], sizeof ( int ) );
+ gzwrite ( outfp, &orien2, sizeof ( char ) );
+ gzwrite ( outfp, &insSizeArray[t], sizeof ( int ) );
+
+ for ( index = 0; index < len2; index++ )
+ {
+ writeChar2tightString ( seqBuffer[t][index], rcSeq[1], index );
+ }
+
+ gzwrite ( outfp, rcSeq[1], ( unsigned ) ( len2 / 4 + 1 ) );
+ }
}
/*
@@ -560,147 +566,149 @@ static void getPEreadOnContig(int t,FILE* outfp)
}
}*/
-static void getReadIngap ( int t, int insSize, gzFile * outfp1, gzFile * outfp2, boolean readOne )
+static void getReadIngap ( int t, int insSize, gzFile *outfp1, gzFile *outfp2, boolean readOne )
{
- int read1, read2;
- char orientation;
-
- if ( readOne )
- {
- read1 = t;
- read2 = t + 1;
-
- if ( orienArray[read2] == '+' )
- {
- orientation = '-';
- }
- else
- {
- orientation = '+';
- }
-
- ctgIdArray[read1] = ctgIdArray[read2];
- posArray[read1] = posArray[read2] + insSize - lenBuffer[read1];
- output1read_gz ( read1, outfp1, outfp2, orientation, 1 );
- }
- else
- {
- read2 = t;
- read1 = t - 1;
-
- if ( orienArray[read1] == '+' )
- {
- orientation = '-';
- }
- else
- {
- orientation = '+';
- }
-
- ctgIdArray[read2] = ctgIdArray[read1];
- posArray[read2] = posArray[read1] + insSize - lenBuffer[read2]; // --> R1 <-- R2
- output1read_gz ( read2, outfp1, outfp2, orientation, 2 );
- }
+ int read1, read2;
+ char orientation;
+
+ if ( readOne )
+ {
+ read1 = t;
+ read2 = t + 1;
+
+ if ( orienArray[read2] == '+' )
+ {
+ orientation = '-';
+ }
+ else
+ {
+ orientation = '+';
+ }
+
+ ctgIdArray[read1] = ctgIdArray[read2];
+ posArray[read1] = posArray[read2] + insSize - lenBuffer[read1];
+ output1read_gz ( read1, outfp1, outfp2, orientation, 1 );
+ }
+ else
+ {
+ read2 = t;
+ read1 = t - 1;
+
+ if ( orienArray[read1] == '+' )
+ {
+ orientation = '-';
+ }
+ else
+ {
+ orientation = '+';
+ }
+
+ ctgIdArray[read2] = ctgIdArray[read1];
+ posArray[read2] = posArray[read1] + insSize - lenBuffer[read2]; // --> R1 <-- R2
+ output1read_gz ( read2, outfp1, outfp2, orientation, 2 );
+ }
}
-static void recordLongRead ( FILE * outfp1, FILE * outfp2 )
+static void recordLongRead ( FILE *outfp1, FILE *outfp2 )
{
- int t;
+ int t;
- for ( t = 0; t < read_c; t++ )
- {
- readCounter++;
+ for ( t = 0; t < read_c; t++ )
+ {
+ readCounter++;
- if ( footprint[t] )
- {
- output1read ( t, outfp1, outfp2, orienArray[t], 0 );
- }
- }
+ if ( footprint[t] )
+ {
+ output1read ( t, outfp1, outfp2, orienArray[t], 0 );
+ }
+ }
}
-static void recordAlldgn ( gzFile * outfp, int * insSizeArr, gzFile * outfp1, gzFile * outfp2, gzFile * outfp4 )
+static void recordAlldgn ( gzFile *outfp, int *insSizeArr, gzFile *outfp1, gzFile *outfp2, gzFile *outfp4 )
{
- int t, ctgId;
- boolean rd1gap, rd2gap;
- char orientation;
-
- for ( t = 0; t < read_c; t++ )
- {
- readCounter++;
- rd1gap = rd2gap = 0;
- ctgId = ctgIdArray[t];
-
- if ( outfp1 && t % 2 == 1 ) //make sure this is read2 in a pair
- {
- if ( ctgIdArray[t] < 1 && ctgIdArray[t - 1] > 0 )
- {
- getReadIngap ( t, insSizeArr[t], outfp1, outfp2, 0 );
- rd2gap = 1;
- }
- else if ( ctgIdArray[t] > 0 && ctgIdArray[t - 1] < 1 )
- {
- getReadIngap ( t - 1, insSizeArr[t - 1], outfp1, outfp2, 1 );
- rd1gap = 1;
- }
- else if ( ctgIdArray[t] > 0 && ctgIdArray[t - 1] > 0 ) //PE read on contig
- {
- if ( fill )
- { getPEreadOnContig ( t, outfp4 ); }
- }
- }
-
- if ( ctgId < 1 )
- {
- continue;
- }
-
- mapCounter++;
- gzprintf ( outfp, "%lld\t%u\t%d\t%c\n", readCounter, ctgIdArray[t], posArray[t], orienArray[t] );
-
- if ( t % 2 == 0 )
- {
- continue;
- }
-
- // reads are not located by pe info but across edges
- if ( outfp1 && footprint[t - 1] && !rd1gap )
- {
- if ( ctgIdArray[t - 1] < 1 )
- {
- locate1read ( t - 1 );
- }
-
- if ( orienArray[t] == '+' )
- {
- orientation = '-';
- }
- else
- {
- orientation = '+';
- }
-
- output1read_gz ( t - 1, outfp1, outfp2, orientation, 1 ); //read1 in gap.
- }
-
- if ( outfp1 && footprint[t] && !rd2gap )
- {
- if ( ctgIdArray[t] < 1 )
- {
- locate1read ( t );
- }
-
- if ( orienArray[t - 1] == '+' )
- {
- orientation = '-';
- }
- else
- {
- orientation = '+';
- }
-
- output1read_gz ( t, outfp1, outfp2, orientation, 2 ); //read2 in gap.
- }
- }
+ int t, ctgId;
+ boolean rd1gap, rd2gap;
+ char orientation;
+
+ for ( t = 0; t < read_c; t++ )
+ {
+ readCounter++;
+ rd1gap = rd2gap = 0;
+ ctgId = ctgIdArray[t];
+
+ if ( outfp1 && t % 2 == 1 ) //make sure this is read2 in a pair
+ {
+ if ( ctgIdArray[t] < 1 && ctgIdArray[t - 1] > 0 )
+ {
+ getReadIngap ( t, insSizeArr[t], outfp1, outfp2, 0 );
+ rd2gap = 1;
+ }
+ else if ( ctgIdArray[t] > 0 && ctgIdArray[t - 1] < 1 )
+ {
+ getReadIngap ( t - 1, insSizeArr[t - 1], outfp1, outfp2, 1 );
+ rd1gap = 1;
+ }
+ else if ( ctgIdArray[t] > 0 && ctgIdArray[t - 1] > 0 ) //PE read on contig
+ {
+ if ( fill )
+ {
+ getPEreadOnContig ( t, outfp4 );
+ }
+ }
+ }
+
+ if ( ctgId < 1 )
+ {
+ continue;
+ }
+
+ mapCounter++;
+ gzprintf ( outfp, "%lld\t%u\t%d\t%c\n", readCounter, ctgIdArray[t], posArray[t], orienArray[t] );
+
+ if ( t % 2 == 0 )
+ {
+ continue;
+ }
+
+ // reads are not located by pe info but across edges
+ if ( outfp1 && footprint[t - 1] && !rd1gap )
+ {
+ if ( ctgIdArray[t - 1] < 1 )
+ {
+ locate1read ( t - 1 );
+ }
+
+ if ( orienArray[t] == '+' )
+ {
+ orientation = '-';
+ }
+ else
+ {
+ orientation = '+';
+ }
+
+ output1read_gz ( t - 1, outfp1, outfp2, orientation, 1 ); //read1 in gap.
+ }
+
+ if ( outfp1 && footprint[t] && !rd2gap )
+ {
+ if ( ctgIdArray[t] < 1 )
+ {
+ locate1read ( t );
+ }
+
+ if ( orienArray[t - 1] == '+' )
+ {
+ orientation = '-';
+ }
+ else
+ {
+ orientation = '+';
+ }
+
+ output1read_gz ( t, outfp1, outfp2, orientation, 2 ); //read2 in gap.
+ }
+ }
}
@@ -716,42 +724,42 @@ Output:
Return:
None.
*************************************************/
-void basicContigInfo ( char * infile )
+void basicContigInfo ( char *infile )
{
- char name[256], lldne[1024];
- FILE * fp;
- int length, bal_ed, num_all, num_long, index;
- sprintf ( name, "%s.ContigIndex", infile );
- fp = ckopen ( name, "r" );
- fgets ( lldne, sizeof ( lldne ), fp );
- sscanf ( lldne + 8, "%d %d", &num_all, &num_long );
- fprintf ( stderr, "%d edge(s) in the graph.\n", num_all );
- num_ctg = num_all;
- contig_array = ( CONTIG * ) ckalloc ( ( num_all + 1 ) * sizeof ( CONTIG ) );
- fgets ( lldne, sizeof ( lldne ), fp );
- num_long = 0;
-
- while ( fgets ( lldne, sizeof ( lldne ), fp ) != NULL )
- {
- sscanf ( lldne, "%d %d %d", &index, &length, &bal_ed );
- contig_array[++num_long].length = length;
- contig_array[num_long].bal_edge = bal_ed + 1;
-
- if ( index != num_long )
- {
- fprintf ( stderr, "BasicContigInfo: %d vs %d.\n", index, num_long );
- }
-
- if ( bal_ed == 0 )
- {
- continue;
- }
-
- contig_array[++num_long].length = length;
- contig_array[num_long].bal_edge = -bal_ed + 1;
- }
-
- fclose ( fp );
+ char name[256], lldne[1024];
+ FILE *fp;
+ int length, bal_ed, num_all, num_long, index;
+ sprintf ( name, "%s.ContigIndex", infile );
+ fp = ckopen ( name, "r" );
+ fgets ( lldne, sizeof ( lldne ), fp );
+ sscanf ( lldne + 8, "%d %d", &num_all, &num_long );
+ fprintf ( stderr, "%d edge(s) in the graph.\n", num_all );
+ num_ctg = num_all;
+ contig_array = ( CONTIG * ) ckalloc ( ( num_all + 1 ) * sizeof ( CONTIG ) );
+ fgets ( lldne, sizeof ( lldne ), fp );
+ num_long = 0;
+
+ while ( fgets ( lldne, sizeof ( lldne ), fp ) != NULL )
+ {
+ sscanf ( lldne, "%d %d %d", &index, &length, &bal_ed );
+ contig_array[++num_long].length = length;
+ contig_array[num_long].bal_edge = bal_ed + 1;
+
+ if ( index != num_long )
+ {
+ fprintf ( stderr, "BasicContigInfo: %d vs %d.\n", index, num_long );
+ }
+
+ if ( bal_ed == 0 )
+ {
+ continue;
+ }
+
+ contig_array[++num_long].length = length;
+ contig_array[num_long].bal_edge = -bal_ed + 1;
+ }
+
+ fclose ( fp );
}
@@ -768,285 +776,291 @@ Output:
Return:
None.
*************************************************/
-void prlRead2Ctg ( char * libfile, char * outfile )
+void prlRead2Ctg ( char *libfile, char *outfile )
{
- long long i;
- char * src_name, *next_name, name[256];
- FILE * fo2;
- gzFile * fo, *outfp1 = NULL, *outfp2 = NULL, *outfp3 = NULL, *outfp4 = NULL;
- int maxReadNum, libNo, prevLibNo, insSize = 0;
- boolean flag, pairs = 1;
- pthread_t threads[thrd_num];
- unsigned char thrdSignal[thrd_num + 1];
- PARAMETER paras[thrd_num];
- //init
- maxReadLen = 0;
- maxNameLen = 256;
- scan_libInfo ( libfile );
- alloc_pe_mem ( num_libs );
-
- if ( !maxReadLen )
- {
- maxReadLen = 100;
- }
-
- fprintf ( stderr, "In file: %s, max seq len %d, max name len %d\n", libfile, maxReadLen, maxNameLen );
-
- if ( maxReadLen > maxReadLen4all )
- {
- maxReadLen4all = maxReadLen;
- }
-
- src_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
- nodeBuffer = ( kmer_t ** ) ckalloc ( buffer_size * sizeof ( kmer_t * ) );
- smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
- maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
- maxReadNum = maxReadNum % 2 == 0 ? maxReadNum : maxReadNum - 1; //make sure paired reads are processed at the same batch
- seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
- lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
- indexArray = ( unsigned int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( unsigned int ) );
- ctgIdArray = ( unsigned int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( unsigned int ) );
- posArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
- orienArray = ( char * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( char ) );
- footprint = ( char * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( char ) );
- insSizeArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
- read_name = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
-
- if ( gLineLen < maxReadLen )
- {
- gStr = ( char * ) ckalloc ( ( maxReadLen + 1 ) * sizeof ( char ) );
- }
-
- for ( i = 0; i < maxReadNum; i++ )
- { read_name[i] = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) ); }
-
- for ( i = 0; i < maxReadNum; i++ )
- {
- seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- }
-
- rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
- thrdSignal[0] = 0;
-
- if ( 1 )
- {
- for ( i = 0; i < thrd_num; i++ )
- {
- rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- thrdSignal[i + 1] = 0;
- paras[i].threadID = i;
- paras[i].mainSignal = &thrdSignal[0];
- paras[i].selfSignal = &thrdSignal[i + 1];
- }
-
- creatThrds ( threads, paras );
- }
-
- if ( !contig_array )
- {
- basicContigInfo ( outfile );
- }
-
- sprintf ( name, "%s.readInGap.gz", outfile );
- outfp1 = gzopen ( name, "wb" );
-
- if ( fill )
- {
- sprintf ( name, "%s.shortreadInGap.gz", outfile );
- outfp2 = gzopen ( name, "w" );
- }
-
- sprintf ( name, "%s.readOnContig.gz", outfile );
- fo = gzopen ( name, "w" );
-
- if ( fill )
- {
- sprintf ( name, "%s.PEreadOnContig.gz", outfile );
- outfp4 = gzopen ( name, "wb" );
- }
-
- gzprintf ( fo, "read\tcontig\tpos\n" );
- readCounter = mapCounter = readsInGap = 0;
- kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;
- prevLibNo = -1;
- int type = 0; //decide whether the PE reads is good or bad
-
- while ( ( flag = read1seqInLib ( seqBuffer[read_c], read_name[read_c], & ( lenBuffer[read_c] ), &libNo, pairs, 0, &type ) ) != 0 )
- {
- if ( type == -1 ) //if the reads is bad, go back.
- {
- i--;
-
- if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
- {
- kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
- }
-
- read_c--;
- n_solexa -= 2;
- continue;
- }
-
- if ( libNo != prevLibNo )
- {
- prevLibNo = libNo;
- insSize = lib_array[libNo].avg_ins;
- ALIGNLEN = lib_array[libNo].map_len;
-
- if ( insSize > 1000 )
- {
- ALIGNLEN = ALIGNLEN < 35 ? 35 : ALIGNLEN;
- }
- else
- {
- ALIGNLEN = ALIGNLEN < 32 ? 32 : ALIGNLEN;
- }
-
- fprintf ( stderr, "Current insert size is %d, map_len is %d.\n", insSize, ALIGNLEN );
- }
-
- insSizeArray[read_c] = insSize;
-
- if ( insSize > 1000 )
- {
- ALIGNLEN = ALIGNLEN < ( lenBuffer[read_c] / 2 + 1 ) ? ( lenBuffer[read_c] / 2 + 1 ) : ALIGNLEN;
- }
-
- if ( ( ++i ) % 100000000 == 0 )
- {
- fprintf ( stderr, "--- %lldth reads.\n", i );
- }
-
- indexArray[read_c] = kmer_c;
-
- if ( lenBuffer[read_c] >= overlaplen + 1 )
- {
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- }
-
- read_c++;
-
- if ( read_c == maxReadNum )
- {
- indexArray[read_c] = kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //searchKmer
- sendWorkSignal ( 3, thrdSignal ); //parse1read
- recordAlldgn ( fo, insSizeArray, outfp1, outfp2, outfp4 );
- kmer_c = 0;
- read_c = 0;
- }
- }
-
- if ( read_c )
- {
- indexArray[read_c] = kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //searchKmer
- sendWorkSignal ( 3, thrdSignal ); //parse1read
- recordAlldgn ( fo, insSizeArray, outfp1, outfp2, outfp4 );
- fprintf ( stderr, "\nTotal reads %lld\n", readCounter );
- fprintf ( stderr, "Reads in gaps %lld\n", readsInGap );
- fprintf ( stderr, "Ratio %.1f%%\n", ( float ) readsInGap / readCounter * 100 );
- }
-
- fprintf ( stderr, "Reads on contigs %lld\n", mapCounter );
- fprintf ( stderr, "Ratio %.1f%%\n", ( float ) mapCounter / readCounter * 100 );
- sendWorkSignal ( 5, thrdSignal ); //stop threads
- thread_wait ( threads );
- gzclose ( fo );
- sprintf ( name, "%s.peGrads", outfile );
- fo2 = ckopen ( name, "w" );
- fprintf ( fo2, "grads&num: %d\t%lld\t%d\n", gradsCounter, n_solexa, maxReadLen4all );
-
- if ( pairs )
- {
- if ( gradsCounter )
- { fprintf ( stderr, "%d pe insert size, the largest boundary is %lld.\n\n", gradsCounter, pes[gradsCounter - 1].PE_bound ); }
- else
- {
- fprintf ( stderr, "No paired reads found.\n" );
- }
-
- for ( i = 0; i < gradsCounter; i++ )
- {
- fprintf ( fo2, "%d\t%lld\t%d\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank, pes[i].pair_num_cut );
- }
-
- fclose ( fo2 );
- }
-
- gzclose ( outfp1 );
-
- if ( fill )
- {
- gzclose ( outfp2 );
- gzclose ( outfp4 );
- }
-
- free_pe_mem ();
- free_libs ();
-
- if ( 1 ) // multi-threads
- {
- for ( i = 0; i < thrd_num; i++ )
- {
- free ( ( void * ) rcSeq[i + 1] );
- }
- }
-
- free ( ( void * ) rcSeq );
-
- for ( i = 0; i < maxReadNum; i++ )
- {
- free ( ( void * ) seqBuffer[i] );
- }
-
- free ( ( void * ) seqBuffer );
- free ( ( void * ) lenBuffer );
- free ( ( void * ) indexArray );
-
- for ( i = 0; i < maxReadNum; i++ )
- { free ( ( void * ) read_name[i] ); }
-
- free ( ( void * ) read_name );
- free ( ( void * ) kmerBuffer );
- free ( ( void * ) smallerBuffer );
- free ( ( void * ) hashBanBuffer );
- free ( ( void * ) nodeBuffer );
- free ( ( void * ) ctgIdArray );
- free ( ( void * ) posArray );
- free ( ( void * ) orienArray );
- free ( ( void * ) footprint );
- free ( ( void * ) insSizeArray );
- free ( ( void * ) src_name );
- free ( ( void * ) next_name );
-
- if ( gLineLen < maxReadLen )
- {
- free ( ( void * ) gStr );
- gStr = NULL;
- }
-
- if ( contig_array )
- {
- free ( ( void * ) contig_array );
- contig_array = NULL;
- }
+ long long i;
+ char *src_name, *next_name, name[256];
+ FILE *fo2;
+ gzFile *fo, *outfp1 = NULL, *outfp2 = NULL, *outfp3 = NULL, *outfp4 = NULL;
+ int maxReadNum, libNo, prevLibNo, insSize = 0;
+ boolean flag, pairs = 1;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+ //init
+ maxReadLen = 0;
+ maxNameLen = 256;
+ scan_libInfo ( libfile );
+ alloc_pe_mem ( num_libs );
+
+ if ( !maxReadLen )
+ {
+ maxReadLen = 100;
+ }
+
+ fprintf ( stderr, "In file: %s, max seq len %d, max name len %d\n", libfile, maxReadLen, maxNameLen );
+
+ if ( maxReadLen > maxReadLen4all )
+ {
+ maxReadLen4all = maxReadLen;
+ }
+
+ src_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
+ nodeBuffer = ( kmer_t ** ) ckalloc ( buffer_size * sizeof ( kmer_t * ) );
+ smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
+ maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
+ maxReadNum = maxReadNum % 2 == 0 ? maxReadNum : maxReadNum - 1; //make sure paired reads are processed at the same batch
+ seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
+ lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
+ indexArray = ( unsigned int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( unsigned int ) );
+ ctgIdArray = ( unsigned int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( unsigned int ) );
+ posArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
+ orienArray = ( char * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( char ) );
+ footprint = ( char * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( char ) );
+ insSizeArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
+ read_name = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
+
+ if ( gLineLen < maxReadLen )
+ {
+ gStr = ( char * ) ckalloc ( ( maxReadLen + 1 ) * sizeof ( char ) );
+ }
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ read_name[i] = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ }
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
+ thrdSignal[0] = 0;
+
+ if ( 1 )
+ {
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ }
+
+ creatThrds ( threads, paras );
+ }
+
+ if ( !contig_array )
+ {
+ basicContigInfo ( outfile );
+ }
+
+ sprintf ( name, "%s.readInGap.gz", outfile );
+ outfp1 = gzopen ( name, "wb" );
+
+ if ( fill )
+ {
+ sprintf ( name, "%s.shortreadInGap.gz", outfile );
+ outfp2 = gzopen ( name, "w" );
+ }
+
+ sprintf ( name, "%s.readOnContig.gz", outfile );
+ fo = gzopen ( name, "w" );
+
+ if ( fill )
+ {
+ sprintf ( name, "%s.PEreadOnContig.gz", outfile );
+ outfp4 = gzopen ( name, "wb" );
+ }
+
+ gzprintf ( fo, "read\tcontig\tpos\n" );
+ readCounter = mapCounter = readsInGap = 0;
+ kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;
+ prevLibNo = -1;
+ int type = 0; //decide whether the PE reads is good or bad
+
+ while ( ( flag = read1seqInLib ( seqBuffer[read_c], read_name[read_c], & ( lenBuffer[read_c] ), &libNo, pairs, 0, &type ) ) != 0 )
+ {
+ if ( type == -1 ) //if the reads is bad, go back.
+ {
+ i--;
+
+ if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
+ {
+ kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
+ }
+
+ read_c--;
+ n_solexa -= 2;
+ continue;
+ }
+
+ if ( libNo != prevLibNo )
+ {
+ prevLibNo = libNo;
+ insSize = lib_array[libNo].avg_ins;
+ ALIGNLEN = lib_array[libNo].map_len;
+
+ if ( insSize > 1000 )
+ {
+ ALIGNLEN = ALIGNLEN < 35 ? 35 : ALIGNLEN;
+ }
+ else
+ {
+ ALIGNLEN = ALIGNLEN < 32 ? 32 : ALIGNLEN;
+ }
+
+ fprintf ( stderr, "Current insert size is %d, map_len is %d.\n", insSize, ALIGNLEN );
+ }
+
+ insSizeArray[read_c] = insSize;
+
+ if ( insSize > 1000 )
+ {
+ ALIGNLEN = ALIGNLEN < ( lenBuffer[read_c] / 2 + 1 ) ? ( lenBuffer[read_c] / 2 + 1 ) : ALIGNLEN;
+ }
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ indexArray[read_c] = kmer_c;
+
+ if ( lenBuffer[read_c] >= overlaplen + 1 )
+ {
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ }
+
+ read_c++;
+
+ if ( read_c == maxReadNum )
+ {
+ indexArray[read_c] = kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //searchKmer
+ sendWorkSignal ( 3, thrdSignal ); //parse1read
+ recordAlldgn ( fo, insSizeArray, outfp1, outfp2, outfp4 );
+ kmer_c = 0;
+ read_c = 0;
+ }
+ }
+
+ if ( read_c )
+ {
+ indexArray[read_c] = kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //searchKmer
+ sendWorkSignal ( 3, thrdSignal ); //parse1read
+ recordAlldgn ( fo, insSizeArray, outfp1, outfp2, outfp4 );
+ fprintf ( stderr, "\nTotal reads %lld\n", readCounter );
+ fprintf ( stderr, "Reads in gaps %lld\n", readsInGap );
+ fprintf ( stderr, "Ratio %.1f%%\n", ( float ) readsInGap / readCounter * 100 );
+ }
+
+ fprintf ( stderr, "Reads on contigs %lld\n", mapCounter );
+ fprintf ( stderr, "Ratio %.1f%%\n", ( float ) mapCounter / readCounter * 100 );
+ sendWorkSignal ( 5, thrdSignal ); //stop threads
+ thread_wait ( threads );
+ gzclose ( fo );
+ sprintf ( name, "%s.peGrads", outfile );
+ fo2 = ckopen ( name, "w" );
+ fprintf ( fo2, "grads&num: %d\t%lld\t%d\n", gradsCounter, n_solexa, maxReadLen4all );
+
+ if ( pairs )
+ {
+ if ( gradsCounter )
+ {
+ fprintf ( stderr, "%d pe insert size, the largest boundary is %lld.\n\n", gradsCounter, pes[gradsCounter - 1].PE_bound );
+ }
+ else
+ {
+ fprintf ( stderr, "No paired reads found.\n" );
+ }
+
+ for ( i = 0; i < gradsCounter; i++ )
+ {
+ fprintf ( fo2, "%d\t%lld\t%d\t%d\n", pes[i].insertS, pes[i].PE_bound, pes[i].rank, pes[i].pair_num_cut );
+ }
+
+ fclose ( fo2 );
+ }
+
+ gzclose ( outfp1 );
+
+ if ( fill )
+ {
+ gzclose ( outfp2 );
+ gzclose ( outfp4 );
+ }
+
+ free_pe_mem ();
+ free_libs ();
+
+ if ( 1 ) // multi-threads
+ {
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ free ( ( void * ) rcSeq[i + 1] );
+ }
+ }
+
+ free ( ( void * ) rcSeq );
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ free ( ( void * ) seqBuffer[i] );
+ }
+
+ free ( ( void * ) seqBuffer );
+ free ( ( void * ) lenBuffer );
+ free ( ( void * ) indexArray );
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ free ( ( void * ) read_name[i] );
+ }
+
+ free ( ( void * ) read_name );
+ free ( ( void * ) kmerBuffer );
+ free ( ( void * ) smallerBuffer );
+ free ( ( void * ) hashBanBuffer );
+ free ( ( void * ) nodeBuffer );
+ free ( ( void * ) ctgIdArray );
+ free ( ( void * ) posArray );
+ free ( ( void * ) orienArray );
+ free ( ( void * ) footprint );
+ free ( ( void * ) insSizeArray );
+ free ( ( void * ) src_name );
+ free ( ( void * ) next_name );
+
+ if ( gLineLen < maxReadLen )
+ {
+ free ( ( void * ) gStr );
+ gStr = NULL;
+ }
+
+ if ( contig_array )
+ {
+ free ( ( void * ) contig_array );
+ contig_array = NULL;
+ }
}
-static void thread_wait ( pthread_t * threads )
+static void thread_wait ( pthread_t *threads )
{
- int i;
+ int i;
- for ( i = 0; i < thrd_num; i++ )
- if ( threads[i] != 0 )
- {
- pthread_join ( threads[i], NULL );
- }
+ for ( i = 0; i < thrd_num; i++ )
+ if ( threads[i] != 0 )
+ {
+ pthread_join ( threads[i], NULL );
+ }
}
@@ -1063,218 +1077,222 @@ Output:
Return:
None.
*************************************************/
-void prlLongRead2Ctg ( char * libfile, char * outfile )
+void prlLongRead2Ctg ( char *libfile, char *outfile )
{
- long long i;
- char * src_name, *next_name, name[256];
- FILE * outfp1, *outfp2, *outfp3;
- int maxReadNum, libNo, prevLibNo;
- boolean flag, pairs = 0;
- pthread_t threads[thrd_num];
- unsigned char thrdSignal[thrd_num + 1];
- PARAMETER paras[thrd_num];
- maxReadLen = 0;
- maxNameLen = 256;
- scan_libInfo ( libfile );
-
- if ( !maxReadLen )
- {
- maxReadLen = 100;
- }
-
- int longReadLen = getMaxLongReadLen ( num_libs );
-
- if ( longReadLen < 1 ) // no long reads
- {
- return;
- }
-
- maxReadLen4all = maxReadLen < longReadLen ? longReadLen : maxReadLen;
- fprintf ( stderr, "In file: %s, long read len %d, max name len %d.\n", libfile, longReadLen, maxNameLen );
- maxReadLen = longReadLen;
- src_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
- nodeBuffer = ( kmer_t ** ) ckalloc ( buffer_size * sizeof ( kmer_t * ) );
- smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
- maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
- maxReadNum = maxReadNum % 2 == 0 ? maxReadNum : maxReadNum - 1; //make sure paired reads are processed at the same batch
- seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
- read_name = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
-
- for ( i = 0; i < maxReadNum; i++ )
- {
- read_name[i] = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- }
-
- lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
- indexArray = ( unsigned int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( unsigned int ) );
- ctgIdArray = ( unsigned int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( unsigned int ) );
- posArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
- orienArray = ( char * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( char ) );
- footprint = ( char * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( char ) );
- insSizeArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
-
- if ( gLineLen < maxReadLen )
- {
- gStr = ( char * ) ckalloc ( ( maxReadLen + 1 ) * sizeof ( char ) );
- }
-
- for ( i = 0; i < maxReadNum; i++ )
- {
- seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- }
-
- rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
- deletion = ( int * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( int ) );
- thrdSignal[0] = 0;
- deletion[0] = 0;
-
- if ( 1 )
- {
- for ( i = 0; i < thrd_num; i++ )
- {
- rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- deletion[i + 1] = 0;
- thrdSignal[i + 1] = 0;
- paras[i].threadID = i;
- paras[i].mainSignal = &thrdSignal[0];
- paras[i].selfSignal = &thrdSignal[i + 1];
- }
-
- creatThrds ( threads, paras );
- }
-
- if ( !contig_array )
- {
- basicContigInfo ( outfile );
- }
-
- sprintf ( name, "%s.longReadInGap", outfile );
- outfp1 = ckopen ( name, "wb" );
-
- if ( fill )
- {
- sprintf ( name, "%s.RlongReadInGap", outfile );
- outfp2 = ckopen ( name, "w" );
- }
-
- readCounter = 0;
- kmer_c = n_solexa = read_c = i = libNo = 0;
- prevLibNo = -1;
- int type = 0; //decide whether the PE reads is good or bad
-
- while ( ( flag = read1seqInLib ( seqBuffer[read_c], read_name[read_c], & ( lenBuffer[read_c] ), &libNo, pairs, 4, &type ) ) != 0 )
- {
- if ( type == -1 ) //if the reads is bad, go back.
- {
- i--;
-
- if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
- {
- kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
- }
-
- read_c--;
- n_solexa -= 2;
- continue;
- }
-
- if ( libNo != prevLibNo )
- {
- prevLibNo = libNo;
- ALIGNLEN = lib_array[libNo].map_len;
- ALIGNLEN = ALIGNLEN < 35 ? 35 : ALIGNLEN;
- fprintf ( stderr, "Map_len %d.\n", ALIGNLEN );
- }
-
- insSizeArray[read_c] = 18;
-
- if ( ( ++i ) % 100000000 == 0 )
- {
- fprintf ( stderr, "--- %lldth reads.\n", i );
- }
-
- indexArray[read_c] = kmer_c;
-
- if ( lenBuffer[read_c] >= overlaplen + 1 )
- {
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- }
-
- read_c++;
-
- if ( read_c == maxReadNum )
- {
- indexArray[read_c] = kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //searchKmer
- sendWorkSignal ( 3, thrdSignal ); //parse1read
- recordLongRead ( outfp1, outfp2 );
- kmer_c = 0;
- read_c = 0;
- }
- }
-
- if ( read_c )
- {
- indexArray[read_c] = kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //searchKmer
- sendWorkSignal ( 3, thrdSignal ); //parse1read
- recordLongRead ( outfp1, outfp2 );
- fprintf ( stderr, "Output %lld out of %lld (%.1f)%% reads in gaps.\n", readsInGap, readCounter, ( float ) readsInGap / readCounter * 100 );
- }
-
- sendWorkSignal ( 5, thrdSignal ); //stop
- thread_wait ( threads );
- fclose ( outfp1 );
-
- if ( fill )
- { fclose ( outfp2 ); }
-
- free_libs ();
-
- if ( 1 ) // multi-threads
- {
- for ( i = 0; i < thrd_num; i++ )
- {
- deletion[0] += deletion[i + 1];
- free ( ( void * ) rcSeq[i + 1] );
- }
- }
-
- fprintf ( stderr, "%d reads deleted.\n", deletion[0] );
- free ( ( void * ) rcSeq );
- free ( ( void * ) deletion );
-
- for ( i = 0; i < maxReadNum; i++ )
- {
- free ( ( void * ) seqBuffer[i] );
- }
-
- for ( i = 0; i < maxReadNum; i++ )
- { free ( ( void * ) read_name[i] ); }
-
- free ( ( void * ) seqBuffer );
- free ( ( void * ) lenBuffer );
- free ( ( void * ) indexArray );
- free ( ( void * ) kmerBuffer );
- free ( ( void * ) smallerBuffer );
- free ( ( void * ) hashBanBuffer );
- free ( ( void * ) nodeBuffer );
- free ( ( void * ) ctgIdArray );
- free ( ( void * ) posArray );
- free ( ( void * ) orienArray );
- free ( ( void * ) footprint );
- free ( ( void * ) insSizeArray );
- free ( ( void * ) src_name );
- free ( ( void * ) next_name );
-
- if ( gLineLen < maxReadLen )
- {
- free ( ( void * ) gStr );
- gStr = NULL;
- }
+ long long i;
+ char *src_name, *next_name, name[256];
+ FILE *outfp1, *outfp2, *outfp3;
+ int maxReadNum, libNo, prevLibNo;
+ boolean flag, pairs = 0;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+ maxReadLen = 0;
+ maxNameLen = 256;
+ scan_libInfo ( libfile );
+
+ if ( !maxReadLen )
+ {
+ maxReadLen = 100;
+ }
+
+ int longReadLen = getMaxLongReadLen ( num_libs );
+
+ if ( longReadLen < 1 ) // no long reads
+ {
+ return;
+ }
+
+ maxReadLen4all = maxReadLen < longReadLen ? longReadLen : maxReadLen;
+ fprintf ( stderr, "In file: %s, long read len %d, max name len %d.\n", libfile, longReadLen, maxNameLen );
+ maxReadLen = longReadLen;
+ src_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
+ nodeBuffer = ( kmer_t ** ) ckalloc ( buffer_size * sizeof ( kmer_t * ) );
+ smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
+ maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
+ maxReadNum = maxReadNum % 2 == 0 ? maxReadNum : maxReadNum - 1; //make sure paired reads are processed at the same batch
+ seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
+ read_name = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ read_name[i] = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ }
+
+ lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
+ indexArray = ( unsigned int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( unsigned int ) );
+ ctgIdArray = ( unsigned int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( unsigned int ) );
+ posArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
+ orienArray = ( char * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( char ) );
+ footprint = ( char * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( char ) );
+ insSizeArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
+
+ if ( gLineLen < maxReadLen )
+ {
+ gStr = ( char * ) ckalloc ( ( maxReadLen + 1 ) * sizeof ( char ) );
+ }
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
+ deletion = ( int * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( int ) );
+ thrdSignal[0] = 0;
+ deletion[0] = 0;
+
+ if ( 1 )
+ {
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ deletion[i + 1] = 0;
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ }
+
+ creatThrds ( threads, paras );
+ }
+
+ if ( !contig_array )
+ {
+ basicContigInfo ( outfile );
+ }
+
+ sprintf ( name, "%s.longReadInGap", outfile );
+ outfp1 = ckopen ( name, "wb" );
+
+ if ( fill )
+ {
+ sprintf ( name, "%s.RlongReadInGap", outfile );
+ outfp2 = ckopen ( name, "w" );
+ }
+
+ readCounter = 0;
+ kmer_c = n_solexa = read_c = i = libNo = 0;
+ prevLibNo = -1;
+ int type = 0; //decide whether the PE reads is good or bad
+
+ while ( ( flag = read1seqInLib ( seqBuffer[read_c], read_name[read_c], & ( lenBuffer[read_c] ), &libNo, pairs, 4, &type ) ) != 0 )
+ {
+ if ( type == -1 ) //if the reads is bad, go back.
+ {
+ i--;
+
+ if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
+ {
+ kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
+ }
+
+ read_c--;
+ n_solexa -= 2;
+ continue;
+ }
+
+ if ( libNo != prevLibNo )
+ {
+ prevLibNo = libNo;
+ ALIGNLEN = lib_array[libNo].map_len;
+ ALIGNLEN = ALIGNLEN < 35 ? 35 : ALIGNLEN;
+ fprintf ( stderr, "Map_len %d.\n", ALIGNLEN );
+ }
+
+ insSizeArray[read_c] = 18;
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ indexArray[read_c] = kmer_c;
+
+ if ( lenBuffer[read_c] >= overlaplen + 1 )
+ {
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ }
+
+ read_c++;
+
+ if ( read_c == maxReadNum )
+ {
+ indexArray[read_c] = kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //searchKmer
+ sendWorkSignal ( 3, thrdSignal ); //parse1read
+ recordLongRead ( outfp1, outfp2 );
+ kmer_c = 0;
+ read_c = 0;
+ }
+ }
+
+ if ( read_c )
+ {
+ indexArray[read_c] = kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //searchKmer
+ sendWorkSignal ( 3, thrdSignal ); //parse1read
+ recordLongRead ( outfp1, outfp2 );
+ fprintf ( stderr, "Output %lld out of %lld (%.1f)%% reads in gaps.\n", readsInGap, readCounter, ( float ) readsInGap / readCounter * 100 );
+ }
+
+ sendWorkSignal ( 5, thrdSignal ); //stop
+ thread_wait ( threads );
+ fclose ( outfp1 );
+
+ if ( fill )
+ {
+ fclose ( outfp2 );
+ }
+
+ free_libs ();
+
+ if ( 1 ) // multi-threads
+ {
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ deletion[0] += deletion[i + 1];
+ free ( ( void * ) rcSeq[i + 1] );
+ }
+ }
+
+ fprintf ( stderr, "%d reads deleted.\n", deletion[0] );
+ free ( ( void * ) rcSeq );
+ free ( ( void * ) deletion );
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ free ( ( void * ) seqBuffer[i] );
+ }
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ free ( ( void * ) read_name[i] );
+ }
+
+ free ( ( void * ) seqBuffer );
+ free ( ( void * ) lenBuffer );
+ free ( ( void * ) indexArray );
+ free ( ( void * ) kmerBuffer );
+ free ( ( void * ) smallerBuffer );
+ free ( ( void * ) hashBanBuffer );
+ free ( ( void * ) nodeBuffer );
+ free ( ( void * ) ctgIdArray );
+ free ( ( void * ) posArray );
+ free ( ( void * ) orienArray );
+ free ( ( void * ) footprint );
+ free ( ( void * ) insSizeArray );
+ free ( ( void * ) src_name );
+ free ( ( void * ) next_name );
+
+ if ( gLineLen < maxReadLen )
+ {
+ free ( ( void * ) gStr );
+ gStr = NULL;
+ }
}
diff --git a/standardPregraph/prlRead2path.c b/standardPregraph/prlRead2path.c
index f584e2a..cab039b 100644
--- a/standardPregraph/prlRead2path.c
+++ b/standardPregraph/prlRead2path.c
@@ -1,7 +1,7 @@
/*
* prlRead2path.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -33,513 +33,513 @@ static const Kmer kmerZero = { 0, 0, 0, 0 };
static const Kmer kmerZero = { 0, 0 };
#endif
-static unsigned int * arcCounters;
+static unsigned int *arcCounters;
static int buffer_size = 100000000;
static long long markCounter = 0;
-static unsigned int * fwriteBuf;
-static unsigned char * markerOnEdge; //edge occured times for each edge
+static unsigned int *fwriteBuf;
+static unsigned char *markerOnEdge; //edge occured times for each edge
//buffer related varibles for chop kmer
static int read_c;
-static char ** rcSeq;
-static char ** seqBuffer;
-static int * lenBuffer;
+static char **rcSeq;
+static char **seqBuffer;
+static int *lenBuffer;
//edge and (K+1)mer related variables
-static preARC ** preArc_array;
-static Kmer * mixBuffer; //kmer buffer; after searching, mixBuffer[j].low = node->l_links; 'node->l_links'
-static boolean * flagArray; //indicate each item in mixBuffer whether it's a (K+1)mer
+static preARC **preArc_array;
+static Kmer *mixBuffer; //kmer buffer; after searching, mixBuffer[j].low = node->l_links; 'node->l_links'
+static boolean *flagArray; //indicate each item in mixBuffer whether it's a (K+1)mer
// kmer related variables
-static char ** flags;
+static char **flags;
static int kmer_c;
-static Kmer * kmerBuffer;
-static ubyte8 * hashBanBuffer;
-static kmer_t ** nodeBuffer; //kmer_t buffer related to 'kmerBuffer'
-static boolean * smallerBuffer;
-static int * indexArray;
+static Kmer *kmerBuffer;
+static ubyte8 *hashBanBuffer;
+static kmer_t **nodeBuffer; //kmer_t buffer related to 'kmerBuffer'
+static boolean *smallerBuffer;
+static int *indexArray;
-static int * deletion; //read deletion number for each thread
+static int *deletion; //read deletion number for each thread
static struct aiocb aio1;
static struct aiocb aio2;
-static char * aioBuffer1;
-static char * aioBuffer2;
-static char * readBuffer1;
-static char * readBuffer2;
+static char *aioBuffer1;
+static char *aioBuffer2;
+static char *readBuffer1;
+static char *readBuffer2;
static void parse1read ( int t, int threadID );
static void search1kmerPlus ( int j, unsigned char thrdID );
-static void threadRoutine ( void * thrdID );
-static void searchKmer ( int t, KmerSet * kset );
+static void threadRoutine ( void *thrdID );
+static void searchKmer ( int t, KmerSet *kset );
static void chopKmer4read ( int t, int threadID );
-static void thread_wait ( pthread_t * threads );
+static void thread_wait ( pthread_t *threads );
static void thread_add1preArc ( unsigned int from_ed, unsigned int to_ed, unsigned int thrdID );
-static void creatThrds ( pthread_t * threads, PARAMETER * paras )
+static void creatThrds ( pthread_t *threads, PARAMETER *paras )
{
- unsigned char i;
- int temp;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
- if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
- {
- fprintf ( stderr, "Create threads failed.\n" );
- exit ( 1 );
- }
- }
-
- fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
+ unsigned char i;
+ int temp;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
+ if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
+ {
+ fprintf ( stderr, "Create threads failed.\n" );
+ exit ( 1 );
+ }
+ }
+
+ fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
}
-static void threadRoutine ( void * para )
+static void threadRoutine ( void *para )
{
- PARAMETER * prm;
- int i, t, j, start, finish;
- unsigned char id;
- prm = ( PARAMETER * ) para;
- id = prm->threadID;
-
- //printf("%dth thread with task %d, hash_table %p\n",id,prm.task,prm.hash_table);
- while ( 1 )
- {
- if ( * ( prm->selfSignal ) == 1 )
- {
- for ( i = 0; i < kmer_c; i++ )
- {
- //if((hashBanBuffer[i]&taskMask)!=prm.threadID)
- if ( ( hashBanBuffer[i] % thrd_num ) != id )
- {
- continue;
- }
-
- searchKmer ( i, KmerSets[id] );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 2 )
- {
- for ( i = 0; i < read_c; i++ )
- {
- if ( i % thrd_num != id )
- {
- continue;
- }
-
- chopKmer4read ( i, id + 1 );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 3 )
- {
- // parse reads
- for ( t = 0; t < read_c; t++ )
- {
- if ( t % thrd_num != id )
- {
- continue;
- }
-
- parse1read ( t, id + 1 );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 4 )
- {
- //printf("thread %d, reads %d splay kmerplus\n",id,read_c);
- for ( t = 0; t < read_c; t++ )
- {
- start = indexArray[t];
- finish = indexArray[t + 1];
+ PARAMETER *prm;
+ int i, t, j, start, finish;
+ unsigned char id;
+ prm = ( PARAMETER * ) para;
+ id = prm->threadID;
+
+ //printf("%dth thread with task %d, hash_table %p\n",id,prm.task,prm.hash_table);
+ while ( 1 )
+ {
+ if ( * ( prm->selfSignal ) == 1 )
+ {
+ for ( i = 0; i < kmer_c; i++ )
+ {
+ //if((hashBanBuffer[i]&taskMask)!=prm.threadID)
+ if ( ( hashBanBuffer[i] % thrd_num ) != id )
+ {
+ continue;
+ }
+
+ searchKmer ( i, KmerSets[id] );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 2 )
+ {
+ for ( i = 0; i < read_c; i++ )
+ {
+ if ( i % thrd_num != id )
+ {
+ continue;
+ }
+
+ chopKmer4read ( i, id + 1 );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 3 )
+ {
+ // parse reads
+ for ( t = 0; t < read_c; t++ )
+ {
+ if ( t % thrd_num != id )
+ {
+ continue;
+ }
+
+ parse1read ( t, id + 1 );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 4 )
+ {
+ //printf("thread %d, reads %d splay kmerplus\n",id,read_c);
+ for ( t = 0; t < read_c; t++ )
+ {
+ start = indexArray[t];
+ finish = indexArray[t + 1];
#ifdef MER127
- for ( j = start; j < finish; j++ )
- {
- if ( flagArray[j] == 0 )
- {
- if ( mixBuffer[j].low2 == 0 )
- {
- break;
- }
- }
- else if ( hashBanBuffer[j] % thrd_num == id )
- {
- //fprintf(stderr,"thread %d search for ban %lld\n",id,hashBanBuffer[j]);
- search1kmerPlus ( j, id );
- }
-
- /*
- if(flagArray[j]==0&&mixBuffer[j]==0)
- break;
- if(!flagArray[j]||(hashBanBuffer[j]%thrd_num)!=id)
- continue;
- search1kmerPlus(j,id);
- */
- }
+ for ( j = start; j < finish; j++ )
+ {
+ if ( flagArray[j] == 0 )
+ {
+ if ( mixBuffer[j].low2 == 0 )
+ {
+ break;
+ }
+ }
+ else if ( hashBanBuffer[j] % thrd_num == id )
+ {
+ //fprintf(stderr,"thread %d search for ban %lld\n",id,hashBanBuffer[j]);
+ search1kmerPlus ( j, id );
+ }
+
+ /*
+ if(flagArray[j]==0&&mixBuffer[j]==0)
+ break;
+ if(!flagArray[j]||(hashBanBuffer[j]%thrd_num)!=id)
+ continue;
+ search1kmerPlus(j,id);
+ */
+ }
#else
- for ( j = start; j < finish; j++ )
- {
- if ( flagArray[j] == 0 )
- {
- if ( mixBuffer[j].low == 0 )
- {
- break;
- }
- }
- else if ( hashBanBuffer[j] % thrd_num == id )
- {
- //fprintf(stderr,"thread %d search for ban %lld\n",id,hashBanBuffer[j]);
- search1kmerPlus ( j, id );
- }
-
- /*
- if(flagArray[j]==0&&mixBuffer[j]==0)
- break;
- if(!flagArray[j]||(hashBanBuffer[j]%thrd_num)!=id)
- continue;
- search1kmerPlus(j,id);
- */
- }
+ for ( j = start; j < finish; j++ )
+ {
+ if ( flagArray[j] == 0 )
+ {
+ if ( mixBuffer[j].low == 0 )
+ {
+ break;
+ }
+ }
+ else if ( hashBanBuffer[j] % thrd_num == id )
+ {
+ //fprintf(stderr,"thread %d search for ban %lld\n",id,hashBanBuffer[j]);
+ search1kmerPlus ( j, id );
+ }
+
+ /*
+ if(flagArray[j]==0&&mixBuffer[j]==0)
+ break;
+ if(!flagArray[j]||(hashBanBuffer[j]%thrd_num)!=id)
+ continue;
+ search1kmerPlus(j,id);
+ */
+ }
#endif
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 6 )
- {
- for ( t = 0; t < read_c; t++ )
- {
- start = indexArray[t];
- finish = indexArray[t + 1];
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 6 )
+ {
+ for ( t = 0; t < read_c; t++ )
+ {
+ start = indexArray[t];
+ finish = indexArray[t + 1];
#ifdef MER127
- for ( j = start; j < finish - 1; j++ )
- {
- if ( mixBuffer[j].low2 == 0 || mixBuffer[j + 1].low2 == 0 )
- {
- break;
- }
+ for ( j = start; j < finish - 1; j++ )
+ {
+ if ( mixBuffer[j].low2 == 0 || mixBuffer[j + 1].low2 == 0 )
+ {
+ break;
+ }
- if ( mixBuffer[j].low2 % thrd_num != id )
- {
- continue;
- }
+ if ( mixBuffer[j].low2 % thrd_num != id )
+ {
+ continue;
+ }
- thread_add1preArc ( mixBuffer[j].low2, mixBuffer[j + 1].low2, id );
- }
+ thread_add1preArc ( mixBuffer[j].low2, mixBuffer[j + 1].low2, id );
+ }
#else
- for ( j = start; j < finish - 1; j++ )
- {
- if ( mixBuffer[j].low == 0 || mixBuffer[j + 1].low == 0 )
- {
- break;
- }
+ for ( j = start; j < finish - 1; j++ )
+ {
+ if ( mixBuffer[j].low == 0 || mixBuffer[j + 1].low == 0 )
+ {
+ break;
+ }
- if ( mixBuffer[j].low % thrd_num != id )
- {
- continue;
- }
+ if ( mixBuffer[j].low % thrd_num != id )
+ {
+ continue;
+ }
- thread_add1preArc ( mixBuffer[j].low, mixBuffer[j + 1].low, id );
- }
+ thread_add1preArc ( mixBuffer[j].low, mixBuffer[j + 1].low, id );
+ }
#endif
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 5 )
- {
- * ( prm->selfSignal ) = 0;
- break;
- }
-
- usleep ( 1 );
- }
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 5 )
+ {
+ * ( prm->selfSignal ) = 0;
+ break;
+ }
+
+ usleep ( 1 );
+ }
}
static void chopKmer4read ( int t, int threadID )
{
- char * src_seq = seqBuffer[t];
- char * bal_seq = rcSeq[threadID];
- int len_seq = lenBuffer[t];
- int j, bal_j;
- ubyte8 hash_ban, bal_hash_ban;
- Kmer word, bal_word;
- int index;
+ char *src_seq = seqBuffer[t];
+ char *bal_seq = rcSeq[threadID];
+ int len_seq = lenBuffer[t];
+ int j, bal_j;
+ ubyte8 hash_ban, bal_hash_ban;
+ Kmer word, bal_word;
+ int index;
#ifdef MER127
- word = kmerZero;
+ word = kmerZero;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= src_seq[index];
+ }
#else
- word = kmerZero;
+ word = kmerZero;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= src_seq[index];
+ }
#endif
- reverseComplementSeq ( src_seq, len_seq, bal_seq );
- // complementary node
- bal_word = reverseComplement ( word, overlaplen );
- bal_j = len_seq - 0 - overlaplen; // 0;
- index = indexArray[t];
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- kmerBuffer[index] = word;
- smallerBuffer[index] = 1;
- hashBanBuffer[index++] = hash_ban;
- }
- else
- {
- bal_hash_ban = hash_kmer ( bal_word );
- kmerBuffer[index] = bal_word;
- smallerBuffer[index] = 0;
- hashBanBuffer[index++] = bal_hash_ban;
- }
-
- //printf("%dth: %p with %p\n",kmer_c-1,bal_word,bal_hash_ban);
- for ( j = 1; j <= len_seq - overlaplen; j++ )
- {
- word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
- bal_j = len_seq - j - overlaplen; // j;
- bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- hash_ban = hash_kmer ( word );
- kmerBuffer[index] = word;
- smallerBuffer[index] = 1;
- hashBanBuffer[index++] = hash_ban;
- //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
- }
- else
- {
- // complementary node
- bal_hash_ban = hash_kmer ( bal_word );
- kmerBuffer[index] = bal_word;
- smallerBuffer[index] = 0;
- hashBanBuffer[index++] = bal_hash_ban;
- //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
- }
- }
+ reverseComplementSeq ( src_seq, len_seq, bal_seq );
+ // complementary node
+ bal_word = reverseComplement ( word, overlaplen );
+ bal_j = len_seq - 0 - overlaplen; // 0;
+ index = indexArray[t];
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ kmerBuffer[index] = word;
+ smallerBuffer[index] = 1;
+ hashBanBuffer[index++] = hash_ban;
+ }
+ else
+ {
+ bal_hash_ban = hash_kmer ( bal_word );
+ kmerBuffer[index] = bal_word;
+ smallerBuffer[index] = 0;
+ hashBanBuffer[index++] = bal_hash_ban;
+ }
+
+ //printf("%dth: %p with %p\n",kmer_c-1,bal_word,bal_hash_ban);
+ for ( j = 1; j <= len_seq - overlaplen; j++ )
+ {
+ word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
+ bal_j = len_seq - j - overlaplen; // j;
+ bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ hash_ban = hash_kmer ( word );
+ kmerBuffer[index] = word;
+ smallerBuffer[index] = 1;
+ hashBanBuffer[index++] = hash_ban;
+ //printf("%dth: %p with %p\n",kmer_c-1,word,hashBanBuffer[kmer_c-1]);
+ }
+ else
+ {
+ // complementary node
+ bal_hash_ban = hash_kmer ( bal_word );
+ kmerBuffer[index] = bal_word;
+ smallerBuffer[index] = 0;
+ hashBanBuffer[index++] = bal_hash_ban;
+ //printf("%dth: %p with %p\n",kmer_c-1,bal_word,hashBanBuffer[kmer_c-1]);
+ }
+ }
}
//splay for one kmer in buffer and save the node to nodeBuffer
-static void searchKmer ( int t, KmerSet * kset )
+static void searchKmer ( int t, KmerSet *kset )
{
- kmer_t * node;
- boolean found = search_kmerset ( kset, kmerBuffer[t], &node );
-
- if ( !found )
- {
- fprintf ( stderr, "SearchKmer: kmer " );
- PrintKmer ( stderr, kmerBuffer[t] );
- fprintf ( stderr, " is not found.\n" );
- /*
- #ifdef MER127
- fprintf (stderr,"searchKmer: kmer %llx %llx %llx %llx is not found\n", kmerBuffer[t].high1, kmerBuffer[t].low1, kmerBuffer[t].high2, kmerBuffer[t].low2);
- #else
- fprintf (stderr,"searchKmer: kmer %llx %llx is not found\n", kmerBuffer[t].high, kmerBuffer[t].low);
- #endif
- */
- }
-
- nodeBuffer[t] = node;
+ kmer_t *node;
+ boolean found = search_kmerset ( kset, kmerBuffer[t], &node );
+
+ if ( !found )
+ {
+ fprintf ( stderr, "SearchKmer: kmer " );
+ PrintKmer ( stderr, kmerBuffer[t] );
+ fprintf ( stderr, " is not found.\n" );
+ /*
+ #ifdef MER127
+ fprintf (stderr,"searchKmer: kmer %llx %llx %llx %llx is not found\n", kmerBuffer[t].high1, kmerBuffer[t].low1, kmerBuffer[t].high2, kmerBuffer[t].low2);
+ #else
+ fprintf (stderr,"searchKmer: kmer %llx %llx is not found\n", kmerBuffer[t].high, kmerBuffer[t].low);
+ #endif
+ */
+ }
+
+ nodeBuffer[t] = node;
}
-static preARC * getPreArcBetween ( unsigned int from_ed, unsigned int to_ed )
+static preARC *getPreArcBetween ( unsigned int from_ed, unsigned int to_ed )
{
- preARC * parc;
- parc = preArc_array[from_ed];
+ preARC *parc;
+ parc = preArc_array[from_ed];
- while ( parc )
- {
- if ( parc->to_ed == to_ed )
- {
- return parc;
- }
+ while ( parc )
+ {
+ if ( parc->to_ed == to_ed )
+ {
+ return parc;
+ }
- parc = parc->next;
- }
+ parc = parc->next;
+ }
- return parc;
+ return parc;
}
static void thread_add1preArc ( unsigned int from_ed, unsigned int to_ed, unsigned int thrdID )
{
- preARC * parc = getPreArcBetween ( from_ed, to_ed );
-
- if ( parc )
- {
- parc->multiplicity++;
- }
- else
- {
- parc = prlAllocatePreArc ( to_ed, preArc_mem_managers[thrdID] );
- arcCounters[thrdID]++;
- parc->next = preArc_array[from_ed];
- preArc_array[from_ed] = parc;
- }
+ preARC *parc = getPreArcBetween ( from_ed, to_ed );
+
+ if ( parc )
+ {
+ parc->multiplicity++;
+ }
+ else
+ {
+ parc = prlAllocatePreArc ( to_ed, preArc_mem_managers[thrdID] );
+ arcCounters[thrdID]++;
+ parc->next = preArc_array[from_ed];
+ preArc_array[from_ed] = parc;
+ }
}
static void memoAlloc4preArc ()
{
- unsigned int i;
- preArc_array = ( preARC ** ) ckalloc ( ( num_ed + 1 ) * sizeof ( preARC * ) );
+ unsigned int i;
+ preArc_array = ( preARC ** ) ckalloc ( ( num_ed + 1 ) * sizeof ( preARC * ) );
- for ( i = 0; i <= num_ed; i++ )
- {
- preArc_array[i] = NULL;
- }
+ for ( i = 0; i <= num_ed; i++ )
+ {
+ preArc_array[i] = NULL;
+ }
}
static void memoFree4preArc ()
{
- prlDestroyPreArcMem ();
+ prlDestroyPreArcMem ();
- if ( preArc_array )
- {
- free ( ( void * ) preArc_array );
- }
+ if ( preArc_array )
+ {
+ free ( ( void * ) preArc_array );
+ }
}
-static void output_arcs ( char * outfile )
+static void output_arcs ( char *outfile )
{
- unsigned int i;
- char name[256];
- FILE * outfp, *outfp2 = NULL;
- preARC * parc;
- sprintf ( name, "%s.preArc", outfile );
- outfp = ckopen ( name, "w" );
-
- if ( repsTie )
- {
- sprintf ( name, "%s.markOnEdge", outfile );
- outfp2 = ckopen ( name, "w" );
- }
-
- markCounter = 0;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- if ( repsTie )
- {
- markCounter += markerOnEdge[i];
- fprintf ( outfp2, "%d\n", markerOnEdge[i] );
- }
-
- parc = preArc_array[i];
-
- if ( !parc )
- {
- continue;
- }
-
- fprintf ( outfp, "%u", i );
-
- while ( parc )
- {
- fprintf ( outfp, " %u %u", parc->to_ed, parc->multiplicity );
- parc = parc->next;
- }
-
- fprintf ( outfp, "\n" );
- }
-
- fclose ( outfp );
-
- if ( repsTie )
- {
- fclose ( outfp2 );
- // fprintf (stderr,"%lld marker(s) counted.\n", markCounter);
- }
+ unsigned int i;
+ char name[256];
+ FILE *outfp, *outfp2 = NULL;
+ preARC *parc;
+ sprintf ( name, "%s.preArc", outfile );
+ outfp = ckopen ( name, "w" );
+
+ if ( repsTie )
+ {
+ sprintf ( name, "%s.markOnEdge", outfile );
+ outfp2 = ckopen ( name, "w" );
+ }
+
+ markCounter = 0;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ if ( repsTie )
+ {
+ markCounter += markerOnEdge[i];
+ fprintf ( outfp2, "%d\n", markerOnEdge[i] );
+ }
+
+ parc = preArc_array[i];
+
+ if ( !parc )
+ {
+ continue;
+ }
+
+ fprintf ( outfp, "%u", i );
+
+ while ( parc )
+ {
+ fprintf ( outfp, " %u %u", parc->to_ed, parc->multiplicity );
+ parc = parc->next;
+ }
+
+ fprintf ( outfp, "\n" );
+ }
+
+ fclose ( outfp );
+
+ if ( repsTie )
+ {
+ fclose ( outfp2 );
+ // fprintf (stderr,"%lld marker(s) counted.\n", markCounter);
+ }
}
-static void recordPathBin ( FILE * outfp )
+static void recordPathBin ( FILE *outfp )
{
- int t, j, start, finish;
- unsigned char counter;
+ int t, j, start, finish;
+ unsigned char counter;
- for ( t = 0; t < read_c; t++ )
- {
- start = indexArray[t];
- finish = indexArray[t + 1];
+ for ( t = 0; t < read_c; t++ )
+ {
+ start = indexArray[t];
+ finish = indexArray[t + 1];
#ifdef MER127
- if ( finish - start < 3 || mixBuffer[start].low2 == 0 || mixBuffer[start + 1].low2 == 0 || mixBuffer[start + 2].low2 == 0 )
- {
- continue;
- }
+ if ( finish - start < 3 || mixBuffer[start].low2 == 0 || mixBuffer[start + 1].low2 == 0 || mixBuffer[start + 2].low2 == 0 )
+ {
+ continue;
+ }
- counter = 0;
+ counter = 0;
- for ( j = start; j < finish; j++ )
- {
- if ( mixBuffer[j].low2 == 0 )
- {
- break;
- }
+ for ( j = start; j < finish; j++ )
+ {
+ if ( mixBuffer[j].low2 == 0 )
+ {
+ break;
+ }
- fwriteBuf[counter++] = ( unsigned int ) mixBuffer[j].low2;
+ fwriteBuf[counter++] = ( unsigned int ) mixBuffer[j].low2;
- if ( markerOnEdge[mixBuffer[j].low2] < 255 )
- {
- markerOnEdge[mixBuffer[j].low2]++;
- }
+ if ( markerOnEdge[mixBuffer[j].low2] < 255 )
+ {
+ markerOnEdge[mixBuffer[j].low2]++;
+ }
- markCounter++;
- }
+ markCounter++;
+ }
#else
- if ( finish - start < 3 || mixBuffer[start].low == 0 || mixBuffer[start + 1].low == 0 || mixBuffer[start + 2].low == 0 )
- {
- continue;
- }
+ if ( finish - start < 3 || mixBuffer[start].low == 0 || mixBuffer[start + 1].low == 0 || mixBuffer[start + 2].low == 0 )
+ {
+ continue;
+ }
- counter = 0;
+ counter = 0;
- for ( j = start; j < finish; j++ )
- {
- if ( mixBuffer[j].low == 0 )
- {
- break;
- }
+ for ( j = start; j < finish; j++ )
+ {
+ if ( mixBuffer[j].low == 0 )
+ {
+ break;
+ }
- fwriteBuf[counter++] = ( unsigned int ) mixBuffer[j].low;
+ fwriteBuf[counter++] = ( unsigned int ) mixBuffer[j].low;
- if ( markerOnEdge[mixBuffer[j].low] < 255 )
- {
- markerOnEdge[mixBuffer[j].low]++;
- }
+ if ( markerOnEdge[mixBuffer[j].low] < 255 )
+ {
+ markerOnEdge[mixBuffer[j].low]++;
+ }
- markCounter++;
- }
+ markCounter++;
+ }
#endif
- fwrite ( &counter, sizeof ( char ), 1, outfp );
- fwrite ( fwriteBuf, sizeof ( unsigned int ), ( int ) counter, outfp );
- }
+ fwrite ( &counter, sizeof ( char ), 1, outfp );
+ fwrite ( fwriteBuf, sizeof ( unsigned int ), ( int ) counter, outfp );
+ }
}
/*************************************************
@@ -557,217 +557,217 @@ Return:
*************************************************/
static void search1kmerPlus ( int j, unsigned char thrdID )
{
- kmer_t * node;
- boolean found = search_kmerset ( KmerSetsPatch[thrdID], mixBuffer[j], &node );
-
- if ( !found )
- {
- /*
- fprintf(stderr,"kmerPlus %llx %llx (hashban %lld) not found, flag %d!\n",
- mixBuffer[j].high,mixBuffer[j].low,hashBanBuffer[j],flagArray[j]);
- */
- mixBuffer[j] = kmerZero;
- return;
- } //else fprintf(stderr,"kmerPlus found\n");
+ kmer_t *node;
+ boolean found = search_kmerset ( KmerSetsPatch[thrdID], mixBuffer[j], &node );
+
+ if ( !found )
+ {
+ /*
+ fprintf(stderr,"kmerPlus %llx %llx (hashban %lld) not found, flag %d!\n",
+ mixBuffer[j].high,mixBuffer[j].low,hashBanBuffer[j],flagArray[j]);
+ */
+ mixBuffer[j] = kmerZero;
+ return;
+ } //else fprintf(stderr,"kmerPlus found\n");
#ifdef MER127
- if ( smallerBuffer[j] )
- {
- mixBuffer[j].low2 = node->l_links;
- }
- else
- {
- mixBuffer[j].low2 = node->l_links + node->twin - 1;
- }
+ if ( smallerBuffer[j] )
+ {
+ mixBuffer[j].low2 = node->l_links;
+ }
+ else
+ {
+ mixBuffer[j].low2 = node->l_links + node->twin - 1;
+ }
#else
- if ( smallerBuffer[j] )
- {
- mixBuffer[j].low = node->l_links;
- }
- else
- {
- mixBuffer[j].low = node->l_links + node->twin - 1;
- }
+ if ( smallerBuffer[j] )
+ {
+ mixBuffer[j].low = node->l_links;
+ }
+ else
+ {
+ mixBuffer[j].low = node->l_links + node->twin - 1;
+ }
#endif
}
static void parse1read ( int t, int threadID )
{
- unsigned int j, retain = 0;
- unsigned int edge_index = 0;
- kmer_t * node;
- boolean isSmaller;
- Kmer wordplus, bal_wordplus;
- unsigned int start, finish, pos;
- Kmer prevKmer, currentKmer;
- boolean IsPrevKmer = 0;
- start = indexArray[t];
- finish = indexArray[t + 1];
- pos = start;
-
- for ( j = start; j < finish; j++ )
- {
- node = nodeBuffer[j];
-
- //extract edges or keep kmers
- if ( ( node->deleted ) || ( node->linear && !node->inEdge ) ) // deleted or in a floating loop
- {
- if ( retain < 2 )
- {
- retain = 0;
- pos = start;
- }
- else
- {
- break;
- }
-
- continue;
- }
-
- isSmaller = smallerBuffer[j];
-
- if ( node->linear )
- {
- if ( isSmaller )
- {
- edge_index = node->l_links;
- }
- else
- {
- edge_index = node->l_links + node->twin - 1;
- }
+ unsigned int j, retain = 0;
+ unsigned int edge_index = 0;
+ kmer_t *node;
+ boolean isSmaller;
+ Kmer wordplus, bal_wordplus;
+ unsigned int start, finish, pos;
+ Kmer prevKmer, currentKmer;
+ boolean IsPrevKmer = 0;
+ start = indexArray[t];
+ finish = indexArray[t + 1];
+ pos = start;
+
+ for ( j = start; j < finish; j++ )
+ {
+ node = nodeBuffer[j];
+
+ //extract edges or keep kmers
+ if ( ( node->deleted ) || ( node->linear && !node->inEdge ) ) // deleted or in a floating loop
+ {
+ if ( retain < 2 )
+ {
+ retain = 0;
+ pos = start;
+ }
+ else
+ {
+ break;
+ }
+
+ continue;
+ }
+
+ isSmaller = smallerBuffer[j];
+
+ if ( node->linear )
+ {
+ if ( isSmaller )
+ {
+ edge_index = node->l_links;
+ }
+ else
+ {
+ edge_index = node->l_links + node->twin - 1;
+ }
#ifdef MER127
- if ( retain == 0 || IsPrevKmer )
- {
- retain++;
- mixBuffer[pos].low2 = edge_index;
- flagArray[pos++] = 0;
- IsPrevKmer = 0;
- }
- else if ( edge_index != mixBuffer[pos - 1].low2 )
- {
- retain++;
- mixBuffer[pos].low2 = edge_index;
- flagArray[pos++] = 0;
- }
+ if ( retain == 0 || IsPrevKmer )
+ {
+ retain++;
+ mixBuffer[pos].low2 = edge_index;
+ flagArray[pos++] = 0;
+ IsPrevKmer = 0;
+ }
+ else if ( edge_index != mixBuffer[pos - 1].low2 )
+ {
+ retain++;
+ mixBuffer[pos].low2 = edge_index;
+ flagArray[pos++] = 0;
+ }
#else
- if ( retain == 0 || IsPrevKmer )
- {
- retain++;
- mixBuffer[pos].low = edge_index;
- flagArray[pos++] = 0;
- IsPrevKmer = 0;
- }
- else if ( edge_index != mixBuffer[pos - 1].low )
- {
- retain++;
- mixBuffer[pos].low = edge_index;
- flagArray[pos++] = 0;
- }
+ if ( retain == 0 || IsPrevKmer )
+ {
+ retain++;
+ mixBuffer[pos].low = edge_index;
+ flagArray[pos++] = 0;
+ IsPrevKmer = 0;
+ }
+ else if ( edge_index != mixBuffer[pos - 1].low )
+ {
+ retain++;
+ mixBuffer[pos].low = edge_index;
+ flagArray[pos++] = 0;
+ }
#endif
- }
- else
- {
- if ( isSmaller )
- {
- currentKmer = node->seq;
- }
- else
- {
- currentKmer = reverseComplement ( node->seq, overlaplen );
- }
-
- if ( IsPrevKmer )
- {
- retain++;
- wordplus = KmerPlus ( prevKmer, lastCharInKmer ( currentKmer ) );
- bal_wordplus = reverseComplement ( wordplus, overlaplen + 1 );
-
- if ( KmerSmaller ( wordplus, bal_wordplus ) )
- {
- smallerBuffer[pos] = 1;
- hashBanBuffer[pos] = hash_kmer ( wordplus );
- mixBuffer[pos] = wordplus;
- }
- else
- {
- smallerBuffer[pos] = 0;
- hashBanBuffer[pos] = hash_kmer ( bal_wordplus );
- mixBuffer[pos] = bal_wordplus;
- }
-
- // fprintf(stderr,"%lld\n",hashBanBuffer[pos]);
- flagArray[pos++] = 1;
- }
-
- IsPrevKmer = 1;
- prevKmer = currentKmer;
- }
- }
-
- /*
- for(j=start;j<pos;j++)
- fprintf(stderr,"%d ",flagArray[j]);
- fprintf(stderr,"\n");
- */
- if ( retain < 1 )
- {
- deletion[threadID]++;
- }
-
- if ( retain < 2 )
- {
- flagArray[start] = 0;
- mixBuffer[start] = kmerZero;
- return;
- }
-
- if ( ( pos - start ) != retain )
- {
- fprintf ( stderr, "Read %d, %d vs %d.\n", t, retain, edge_index - start );
- }
-
- if ( pos < finish )
- {
- flagArray[pos] = 0;
- mixBuffer[pos] = kmerZero;
- }
+ }
+ else
+ {
+ if ( isSmaller )
+ {
+ currentKmer = node->seq;
+ }
+ else
+ {
+ currentKmer = reverseComplement ( node->seq, overlaplen );
+ }
+
+ if ( IsPrevKmer )
+ {
+ retain++;
+ wordplus = KmerPlus ( prevKmer, lastCharInKmer ( currentKmer ) );
+ bal_wordplus = reverseComplement ( wordplus, overlaplen + 1 );
+
+ if ( KmerSmaller ( wordplus, bal_wordplus ) )
+ {
+ smallerBuffer[pos] = 1;
+ hashBanBuffer[pos] = hash_kmer ( wordplus );
+ mixBuffer[pos] = wordplus;
+ }
+ else
+ {
+ smallerBuffer[pos] = 0;
+ hashBanBuffer[pos] = hash_kmer ( bal_wordplus );
+ mixBuffer[pos] = bal_wordplus;
+ }
+
+ // fprintf(stderr,"%lld\n",hashBanBuffer[pos]);
+ flagArray[pos++] = 1;
+ }
+
+ IsPrevKmer = 1;
+ prevKmer = currentKmer;
+ }
+ }
+
+ /*
+ for(j=start;j<pos;j++)
+ fprintf(stderr,"%d ",flagArray[j]);
+ fprintf(stderr,"\n");
+ */
+ if ( retain < 1 )
+ {
+ deletion[threadID]++;
+ }
+
+ if ( retain < 2 )
+ {
+ flagArray[start] = 0;
+ mixBuffer[start] = kmerZero;
+ return;
+ }
+
+ if ( ( pos - start ) != retain )
+ {
+ fprintf ( stderr, "Read %d, %d vs %d.\n", t, retain, edge_index - start );
+ }
+
+ if ( pos < finish )
+ {
+ flagArray[pos] = 0;
+ mixBuffer[pos] = kmerZero;
+ }
}
-static void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals )
+static void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals )
{
- int t;
-
- for ( t = 0; t < thrd_num; t++ )
- {
- thrdSignals[t + 1] = SIG;
- }
-
- while ( 1 )
- {
- usleep ( 10 );
-
- for ( t = 0; t < thrd_num; t++ )
- if ( thrdSignals[t + 1] )
- {
- break;
- }
-
- if ( t == thrd_num )
- {
- break;
- }
- }
+ int t;
+
+ for ( t = 0; t < thrd_num; t++ )
+ {
+ thrdSignals[t + 1] = SIG;
+ }
+
+ while ( 1 )
+ {
+ usleep ( 10 );
+
+ for ( t = 0; t < thrd_num; t++ )
+ if ( thrdSignals[t + 1] )
+ {
+ break;
+ }
+
+ if ( t == thrd_num )
+ {
+ break;
+ }
+ }
}
/*************************************************
@@ -783,560 +783,580 @@ Output:
Return:
None.
*************************************************/
-void prlRead2edge ( char * libfile, char * outfile )
+void prlRead2edge ( char *libfile, char *outfile )
{
- char * cach1;
- char * cach2;
- unsigned char asm_ctg = 1;
- long long i;
- char name[256], *src_name, *next_name;
- FILE * outfp = NULL;
- int maxReadNum, libNo;
- boolean flag, pairs = 0;
- pthread_t threads[thrd_num];
- unsigned char thrdSignal[thrd_num + 1];
- PARAMETER paras[thrd_num];
- //init
- maxReadLen = 0;
- maxNameLen = 256;
- scan_libInfo ( libfile );
- alloc_pe_mem ( num_libs );
-
- if ( !maxReadLen )
- {
- maxReadLen = 100;
- }
-
- maxReadLen4all = maxReadLen;
- fprintf ( stderr, "In file: %s, max seq len %d, max name len %d.\n", libfile, maxReadLen, maxNameLen );
-
- if ( repsTie )
- {
- sprintf ( name, "%s.path", outfile );
- outfp = ckopen ( name, "wb" );
- }
-
- src_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- mixBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
- nodeBuffer = ( kmer_t ** ) ckalloc ( buffer_size * sizeof ( kmer_t * ) );
- smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
- flagArray = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
- maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
- //printf("buffer for at most %d reads\n",maxReadNum);
- int maxAIOSize = 32768;
- aioBuffer1 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
- aioBuffer2 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
- readBuffer1 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024
- readBuffer2 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //1024
- cach1 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
- cach2 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
- memset ( cach1, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
- memset ( cach2, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
- seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
- lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
- indexArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
-
- for ( i = 0; i < maxReadNum; i++ )
- {
- seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- }
-
- memoAlloc4preArc ();
- flags = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
- deletion = ( int * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( int ) );
- rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
-
- if ( repsTie )
- {
- markerOnEdge = ( unsigned char * ) ckalloc ( ( num_ed + 1 ) * sizeof ( unsigned char ) );
-
- for ( i = 1; i <= num_ed; i++ )
- {
- markerOnEdge[i] = 0;
- }
-
- fwriteBuf = ( unsigned int * ) ckalloc ( ( maxReadLen - overlaplen + 1 ) * sizeof ( unsigned int ) );
- }
-
- thrdSignal[0] = 0;
-
- if ( 1 )
- {
- preArc_mem_managers = ( MEM_MANAGER ** ) ckalloc ( thrd_num * sizeof ( MEM_MANAGER * ) );
- arcCounters = ( unsigned int * ) ckalloc ( thrd_num * sizeof ( unsigned int ) );
-
- for ( i = 0; i < thrd_num; i++ )
- {
- arcCounters[i] = 0;
- preArc_mem_managers[i] = createMem_manager ( preARCBLOCKSIZE, sizeof ( preARC ) );
- deletion[i + 1] = 0;
- flags[i + 1] = ( char * ) ckalloc ( 2 * maxReadLen * sizeof ( char ) );
- rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- thrdSignal[i + 1] = 0;
- paras[i].threadID = i;
- paras[i].mainSignal = &thrdSignal[0];
- paras[i].selfSignal = &thrdSignal[i + 1];
- }
-
- creatThrds ( threads, paras );
- }
-
- if ( 1 )
- {
- deletion[0] = 0;
- flags[0] = ( char * ) ckalloc ( 2 * maxReadLen * sizeof ( char ) );
- rcSeq[0] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- }
-
- kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;
- int t0, t1, t2, t3, t4, t5, t6;
- t0 = t1 = t2 = t3 = t4 = t5 = t6 = 0;
- time_t read_start, read_end, time_bef, time_aft;
- time ( &read_start );
-
- while ( openNextFile ( &libNo, pairs, asm_ctg ) )
- {
- if ( lib_array[libNo].curr_type == 4 )
- {
- int type = 0; //deside the PE reads is good or bad
-
- while ( ( flag = read1seqInLibBam ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), &libNo, pairs, 1, &type ) ) != 0 )
- {
- if ( type == -1 ) //if the reads is bad, go back.
- {
- i--;
-
- if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
- {
- kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
- read_c--;
- }
-
- n_solexa -= 2;
- continue;
- }
-
- if ( ( ++i ) % 100000000 == 0 )
- {
- fprintf ( stderr, "--- %lldth reads.\n", i );
- }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- {
- continue;
- }
-
- //if(lenBuffer[read_c]>70)
- // lenBuffer[read_c] = 70;
- //else if(lenBuffer[read_c]>40)
- // lenBuffer[read_c] = 40;
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
-
- if ( read_c == maxReadNum )
- {
- indexArray[read_c] = kmer_c;
- time ( &read_end );
- t0 += read_end - read_start;
- time ( &time_bef );
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- time ( &time_aft );
- t1 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 1, thrdSignal ); //searchKmer
- time ( &time_aft );
- t2 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 3, thrdSignal ); //parse1read
- time ( &time_aft );
- t3 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
- time ( &time_aft );
- t4 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
- time ( &time_aft );
- t5 += time_aft - time_bef;
- time ( &time_bef );
-
- //recordPreArc();
- if ( repsTie )
- {
- recordPathBin ( outfp );
- }
-
- time ( &time_aft );
- t6 += time_aft - time_bef;
- //output_path(read_c,edge_no,flags,outfp);
- kmer_c = 0;
- read_c = 0;
- time ( &read_start );
- }
- }
- }
- else if ( lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2 )
- {
- initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
- initAIO ( &aio2, aioBuffer2, fileno ( lib_array[libNo].fp2 ), maxAIOSize );
- int offset1, offset2, flag1, flag2, rt1, rt2;
- offset1 = offset2 = 0;
- rt1 = aio_read ( &aio1 );
- rt2 = aio_read ( &aio2 );
- flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
- flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
-
- if ( flag1 && flag2 )
- {
- int start1, start2, turn;
- start1 = start2 = 0;
- turn = 1;
-
- while ( start1 < offset1 || start2 < offset2 )
- {
- if ( turn == 1 )
- {
- turn = 2;
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );
-
- if ( ( ++i ) % 100000000 == 0 )
- { fprintf ( stderr, "--- %lldth reads.\n", i ); }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- {
- if ( start1 >= offset1 )
- {
- start1 = 0;
- offset1 = 0;
- flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
- }
-
- continue;
- }
-
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
-
- if ( start1 >= offset1 )
- {
- start1 = 0;
- offset1 = 0;
- flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
- }
-
- if ( read_c == maxReadNum )
- {
- indexArray[read_c] = kmer_c;
- time ( &read_end );
- t0 += read_end - read_start;
- time ( &time_bef );
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- time ( &time_aft );
- t1 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 1, thrdSignal ); //searchKmer
- time ( &time_aft );
- t2 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 3, thrdSignal ); //parse1read
- time ( &time_aft );
- t3 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
- time ( &time_aft );
- t4 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
- time ( &time_aft );
- t5 += time_aft - time_bef;
- time ( &time_bef );
-
- //recordPreArc();
- if ( repsTie )
- { recordPathBin ( outfp ); }
-
- time ( &time_aft );
- t6 += time_aft - time_bef;
- //output_path(read_c,edge_no,flags,outfp);
- kmer_c = 0;
- read_c = 0;
- time ( &read_start );
- }
-
- continue;
- }
-
- if ( turn == 2 )
- {
- turn = 1;
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );
-
- if ( ( ++i ) % 100000000 == 0 )
- { fprintf ( stderr, "--- %lldth reads.\n", i ); }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- {
- if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
- { break; }
-
- if ( start2 >= offset2 )
- {
- start2 = 0;
- offset2 = 0;
- flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
- }
-
- continue;
- }
-
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
-
- if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
- { break; }
-
- if ( start2 >= offset2 )
- {
- start2 = 0;
- offset2 = 0;
- flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
- }
-
- if ( read_c == maxReadNum )
- {
- indexArray[read_c] = kmer_c;
- time ( &read_end );
- t0 += read_end - read_start;
- time ( &time_bef );
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- time ( &time_aft );
- t1 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 1, thrdSignal ); //searchKmer
- time ( &time_aft );
- t2 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 3, thrdSignal ); //parse1read
- time ( &time_aft );
- t3 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
- time ( &time_aft );
- t4 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
- time ( &time_aft );
- t5 += time_aft - time_bef;
- time ( &time_bef );
-
- //recordPreArc();
- if ( repsTie )
- { recordPathBin ( outfp ); }
-
- time ( &time_aft );
- t6 += time_aft - time_bef;
- //output_path(read_c,edge_no,flags,outfp);
- kmer_c = 0;
- read_c = 0;
- time ( &read_start );
- }
-
- continue;
- }
- }
- }
- else
- {
- fprintf(stderr, "Error: aio_read error.\n");
- }
- }
- else
- {
- initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
- int offset, flag1, rt;
- offset = 0;
- rt = aio_read ( &aio1 );
-
- while ( ( flag1 = AIORead ( &aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type ) ) )
- {
- int start = 0;
-
- while ( start < offset )
- {
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );
-
- if ( ( ++i ) % 100000000 == 0 )
- { fprintf ( stderr, "--- %lldth reads.\n", i ); }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- { continue; }
-
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
- }
-
- if ( read_c > maxReadNum - 1024 )
- {
- indexArray[read_c] = kmer_c;
- time ( &read_end );
- t0 += read_end - read_start;
- time ( &time_bef );
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- time ( &time_aft );
- t1 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 1, thrdSignal ); //searchKmer
- time ( &time_aft );
- t2 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 3, thrdSignal ); //parse1read
- time ( &time_aft );
- t3 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
- time ( &time_aft );
- t4 += time_aft - time_bef;
- time ( &time_bef );
- sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
- time ( &time_aft );
- t5 += time_aft - time_bef;
- time ( &time_bef );
-
- //recordPreArc();
- if ( repsTie )
- { recordPathBin ( outfp ); }
-
- time ( &time_aft );
- t6 += time_aft - time_bef;
- //output_path(read_c,edge_no,flags,outfp);
- kmer_c = 0;
- read_c = 0;
- time ( &read_start );
- }
-
- if ( flag1 == 2 )
- { break; }
- }
- }
- }
-
- fprintf ( stderr, "%lld read(s) processed.\n", i );
- // fprintf (stderr,"Time ReadingReads: %d,chopKmer4read: %d,searchKmer: %d,parse1read: %d,search1kmerPlus: %d,thread_add1preArc: %d,recordPathBin: %d\n", t0, t1, t2, t3, t4, t5, t6);
- fprintf ( stderr, "Time spent on:\n" );
- fprintf ( stderr, " importing reads: %ds,\n", t0 );
- fprintf ( stderr, " chopping reads to kmers: %ds,\n", t1 );
- fprintf ( stderr, " searching kmers: %ds,\n", t2 );
- fprintf ( stderr, " aligning reads to edges: %ds,\n", t3 );
- fprintf ( stderr, " searching (K+1)mers: %ds,\n", t4 );
- fprintf ( stderr, " adding pre-arcs: %ds,\n", t5 );
- fprintf ( stderr, " recording read paths: %ds.\n", t6 );
-
- if ( read_c )
- {
- indexArray[read_c] = kmer_c;
- sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
- sendWorkSignal ( 1, thrdSignal ); //searchKmer
- sendWorkSignal ( 3, thrdSignal ); //parse1read
- sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
- sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
-
- //recordPreArc();
- if ( repsTie )
- {
- recordPathBin ( outfp );
- }
- }
-
- fprintf ( stderr, "%lld marker(s) output.\n", markCounter );
- sendWorkSignal ( 5, thrdSignal ); //over
- thread_wait ( threads );
- output_arcs ( outfile );
- memoFree4preArc ();
-
- if ( 1 ) // multi-threads
- {
- arcCounter = 0;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- arcCounter += arcCounters[i];
- free ( ( void * ) flags[i + 1] );
- deletion[0] += deletion[i + 1];
- free ( ( void * ) rcSeq[i + 1] );
- }
- }
-
- if ( 1 )
- {
- free ( ( void * ) flags[0] );
- free ( ( void * ) rcSeq[0] );
- }
-
- fprintf ( stderr, "Reads alignment done, %d read(s) deleted, %lld pre-arc(s) added.\n", deletion[0], arcCounter );
-
- if ( repsTie )
- {
- free ( ( void * ) markerOnEdge );
- free ( ( void * ) fwriteBuf );
- }
-
- free ( ( void * ) arcCounters );
- free ( ( void * ) rcSeq );
-
- for ( i = 0; i < maxReadNum; i++ )
- {
- free ( ( void * ) seqBuffer[i] );
- }
-
- free ( ( void * ) seqBuffer );
- free ( ( void * ) lenBuffer );
- free ( ( void * ) indexArray );
- free ( ( void * ) flags );
- free ( ( void * ) deletion );
- free ( ( void * ) kmerBuffer );
- free ( ( void * ) mixBuffer );
- free ( ( void * ) smallerBuffer );
- free ( ( void * ) flagArray );
- free ( ( void * ) hashBanBuffer );
- free ( ( void * ) nodeBuffer );
- free ( ( void * ) src_name );
- free ( ( void * ) next_name );
- free ( ( void * ) aioBuffer1 );
- free ( ( void * ) aioBuffer2 );
- free ( ( void * ) readBuffer1 );
- free ( ( void * ) readBuffer2 );
- free ( ( void * ) cach1 );
- free ( ( void * ) cach2 );
-
- if ( gLineLen < maxReadLen )
- {
- free ( ( void * ) gStr );
- gStr = NULL;
- }
-
- if ( repsTie )
- {
- fclose ( outfp );
- }
-
- free_pe_mem ();
- free_libs ();
+ char *cach1;
+ char *cach2;
+ unsigned char asm_ctg = 1;
+ long long i;
+ char name[256], *src_name, *next_name;
+ FILE *outfp = NULL;
+ int maxReadNum, libNo;
+ boolean flag, pairs = 0;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+ //init
+ maxReadLen = 0;
+ maxNameLen = 256;
+ scan_libInfo ( libfile );
+ alloc_pe_mem ( num_libs );
+
+ if ( !maxReadLen )
+ {
+ maxReadLen = 100;
+ }
+
+ maxReadLen4all = maxReadLen;
+ fprintf ( stderr, "In file: %s, max seq len %d, max name len %d.\n", libfile, maxReadLen, maxNameLen );
+
+ if ( repsTie )
+ {
+ sprintf ( name, "%s.path", outfile );
+ outfp = ckopen ( name, "wb" );
+ }
+
+ src_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ mixBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ hashBanBuffer = ( ubyte8 * ) ckalloc ( buffer_size * sizeof ( ubyte8 ) );
+ nodeBuffer = ( kmer_t ** ) ckalloc ( buffer_size * sizeof ( kmer_t * ) );
+ smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
+ flagArray = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
+ maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
+ //printf("buffer for at most %d reads\n",maxReadNum);
+ int maxAIOSize = 32768;
+ aioBuffer1 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
+ aioBuffer2 = ( char * ) ckalloc ( ( maxAIOSize ) * sizeof ( char ) );
+ readBuffer1 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //(char *)ckalloc(maxAIOSize*sizeof(char)); //1024
+ readBuffer2 = ( char * ) ckalloc ( ( maxAIOSize + ( maxReadLen * 4 + 1024 ) ) * sizeof ( char ) ); //1024
+ cach1 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
+ cach2 = ( char * ) ckalloc ( ( maxReadLen * 4 + 1024 ) * sizeof ( char ) ); //1024
+ memset ( cach1, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
+ memset ( cach2, '\0', ( maxReadLen * 4 + 1024 ) ); //1024
+ seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
+ lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
+ indexArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ memoAlloc4preArc ();
+ flags = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
+ deletion = ( int * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( int ) );
+ rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
+
+ if ( repsTie )
+ {
+ markerOnEdge = ( unsigned char * ) ckalloc ( ( num_ed + 1 ) * sizeof ( unsigned char ) );
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ markerOnEdge[i] = 0;
+ }
+
+ fwriteBuf = ( unsigned int * ) ckalloc ( ( maxReadLen - overlaplen + 1 ) * sizeof ( unsigned int ) );
+ }
+
+ thrdSignal[0] = 0;
+
+ if ( 1 )
+ {
+ preArc_mem_managers = ( MEM_MANAGER ** ) ckalloc ( thrd_num * sizeof ( MEM_MANAGER * ) );
+ arcCounters = ( unsigned int * ) ckalloc ( thrd_num * sizeof ( unsigned int ) );
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ arcCounters[i] = 0;
+ preArc_mem_managers[i] = createMem_manager ( preARCBLOCKSIZE, sizeof ( preARC ) );
+ deletion[i + 1] = 0;
+ flags[i + 1] = ( char * ) ckalloc ( 2 * maxReadLen * sizeof ( char ) );
+ rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ }
+
+ creatThrds ( threads, paras );
+ }
+
+ if ( 1 )
+ {
+ deletion[0] = 0;
+ flags[0] = ( char * ) ckalloc ( 2 * maxReadLen * sizeof ( char ) );
+ rcSeq[0] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ kmer_c = n_solexa = read_c = i = libNo = readNumBack = gradsCounter = 0;
+ int t0, t1, t2, t3, t4, t5, t6;
+ t0 = t1 = t2 = t3 = t4 = t5 = t6 = 0;
+ time_t read_start, read_end, time_bef, time_aft;
+ time ( &read_start );
+
+ while ( openNextFile ( &libNo, pairs, asm_ctg ) )
+ {
+ if ( lib_array[libNo].curr_type == 4 )
+ {
+ int type = 0; //deside the PE reads is good or bad
+
+ while ( ( flag = read1seqInLibBam ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), &libNo, pairs, 1, &type ) ) != 0 )
+ {
+ if ( type == -1 ) //if the reads is bad, go back.
+ {
+ i--;
+
+ if ( lenBuffer[read_c - 1] >= overlaplen + 1 )
+ {
+ kmer_c -= lenBuffer[read_c - 1] - overlaplen + 1;
+ read_c--;
+ }
+
+ n_solexa -= 2;
+ continue;
+ }
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ continue;
+ }
+
+ //if(lenBuffer[read_c]>70)
+ // lenBuffer[read_c] = 70;
+ //else if(lenBuffer[read_c]>40)
+ // lenBuffer[read_c] = 40;
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+
+ if ( read_c == maxReadNum )
+ {
+ indexArray[read_c] = kmer_c;
+ time ( &read_end );
+ t0 += read_end - read_start;
+ time ( &time_bef );
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ time ( &time_aft );
+ t1 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 1, thrdSignal ); //searchKmer
+ time ( &time_aft );
+ t2 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 3, thrdSignal ); //parse1read
+ time ( &time_aft );
+ t3 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
+ time ( &time_aft );
+ t4 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
+ time ( &time_aft );
+ t5 += time_aft - time_bef;
+ time ( &time_bef );
+
+ //recordPreArc();
+ if ( repsTie )
+ {
+ recordPathBin ( outfp );
+ }
+
+ time ( &time_aft );
+ t6 += time_aft - time_bef;
+ //output_path(read_c,edge_no,flags,outfp);
+ kmer_c = 0;
+ read_c = 0;
+ time ( &read_start );
+ }
+ }
+ }
+ else if ( lib_array[libNo].curr_type == 1 || lib_array[libNo].curr_type == 2 )
+ {
+ initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
+ initAIO ( &aio2, aioBuffer2, fileno ( lib_array[libNo].fp2 ), maxAIOSize );
+ int offset1, offset2, flag1, flag2, rt1, rt2;
+ offset1 = offset2 = 0;
+ rt1 = aio_read ( &aio1 );
+ rt2 = aio_read ( &aio2 );
+ flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
+ flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
+
+ if ( flag1 && flag2 )
+ {
+ int start1, start2, turn;
+ start1 = start2 = 0;
+ turn = 1;
+
+ while ( start1 < offset1 || start2 < offset2 )
+ {
+ if ( turn == 1 )
+ {
+ turn = 2;
+ readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ if ( start1 >= offset1 )
+ {
+ start1 = 0;
+ offset1 = 0;
+ flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
+ }
+
+ continue;
+ }
+
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+
+ if ( start1 >= offset1 )
+ {
+ start1 = 0;
+ offset1 = 0;
+ flag1 = AIORead ( &aio1, &offset1, readBuffer1, cach1, &rt1, lib_array[libNo].curr_type );
+ }
+
+ if ( read_c == maxReadNum )
+ {
+ indexArray[read_c] = kmer_c;
+ time ( &read_end );
+ t0 += read_end - read_start;
+ time ( &time_bef );
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ time ( &time_aft );
+ t1 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 1, thrdSignal ); //searchKmer
+ time ( &time_aft );
+ t2 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 3, thrdSignal ); //parse1read
+ time ( &time_aft );
+ t3 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
+ time ( &time_aft );
+ t4 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
+ time ( &time_aft );
+ t5 += time_aft - time_bef;
+ time ( &time_bef );
+
+ //recordPreArc();
+ if ( repsTie )
+ {
+ recordPathBin ( outfp );
+ }
+
+ time ( &time_aft );
+ t6 += time_aft - time_bef;
+ //output_path(read_c,edge_no,flags,outfp);
+ kmer_c = 0;
+ read_c = 0;
+ time ( &read_start );
+ }
+
+ continue;
+ }
+
+ if ( turn == 2 )
+ {
+ turn = 1;
+ readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
+ {
+ break;
+ }
+
+ if ( start2 >= offset2 )
+ {
+ start2 = 0;
+ offset2 = 0;
+ flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
+ }
+
+ continue;
+ }
+
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+
+ if ( ( flag2 == 2 ) && ( start2 >= offset2 ) )
+ {
+ break;
+ }
+
+ if ( start2 >= offset2 )
+ {
+ start2 = 0;
+ offset2 = 0;
+ flag2 = AIORead ( &aio2, &offset2, readBuffer2, cach2, &rt2, lib_array[libNo].curr_type );
+ }
+
+ if ( read_c == maxReadNum )
+ {
+ indexArray[read_c] = kmer_c;
+ time ( &read_end );
+ t0 += read_end - read_start;
+ time ( &time_bef );
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ time ( &time_aft );
+ t1 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 1, thrdSignal ); //searchKmer
+ time ( &time_aft );
+ t2 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 3, thrdSignal ); //parse1read
+ time ( &time_aft );
+ t3 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
+ time ( &time_aft );
+ t4 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
+ time ( &time_aft );
+ t5 += time_aft - time_bef;
+ time ( &time_bef );
+
+ //recordPreArc();
+ if ( repsTie )
+ {
+ recordPathBin ( outfp );
+ }
+
+ time ( &time_aft );
+ t6 += time_aft - time_bef;
+ //output_path(read_c,edge_no,flags,outfp);
+ kmer_c = 0;
+ read_c = 0;
+ time ( &read_start );
+ }
+
+ continue;
+ }
+ }
+ }
+ else
+ {
+ fprintf(stderr, "Error: aio_read error.\n");
+ }
+ }
+ else
+ {
+ initAIO ( &aio1, aioBuffer1, fileno ( lib_array[libNo].fp1 ), maxAIOSize );
+ int offset, flag1, rt;
+ offset = 0;
+ rt = aio_read ( &aio1 );
+
+ while ( ( flag1 = AIORead ( &aio1, &offset, readBuffer1, cach1, &rt, lib_array[libNo].curr_type ) ) )
+ {
+ int start = 0;
+
+ while ( start < offset )
+ {
+ readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );
+
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ continue;
+ }
+
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+ }
+
+ if ( read_c > maxReadNum - 1024 )
+ {
+ indexArray[read_c] = kmer_c;
+ time ( &read_end );
+ t0 += read_end - read_start;
+ time ( &time_bef );
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ time ( &time_aft );
+ t1 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 1, thrdSignal ); //searchKmer
+ time ( &time_aft );
+ t2 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 3, thrdSignal ); //parse1read
+ time ( &time_aft );
+ t3 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
+ time ( &time_aft );
+ t4 += time_aft - time_bef;
+ time ( &time_bef );
+ sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
+ time ( &time_aft );
+ t5 += time_aft - time_bef;
+ time ( &time_bef );
+
+ //recordPreArc();
+ if ( repsTie )
+ {
+ recordPathBin ( outfp );
+ }
+
+ time ( &time_aft );
+ t6 += time_aft - time_bef;
+ //output_path(read_c,edge_no,flags,outfp);
+ kmer_c = 0;
+ read_c = 0;
+ time ( &read_start );
+ }
+
+ if ( flag1 == 2 )
+ {
+ break;
+ }
+ }
+ }
+ }
+
+ fprintf ( stderr, "%lld read(s) processed.\n", i );
+ // fprintf (stderr,"Time ReadingReads: %d,chopKmer4read: %d,searchKmer: %d,parse1read: %d,search1kmerPlus: %d,thread_add1preArc: %d,recordPathBin: %d\n", t0, t1, t2, t3, t4, t5, t6);
+ fprintf ( stderr, "Time spent on:\n" );
+ fprintf ( stderr, " importing reads: %ds,\n", t0 );
+ fprintf ( stderr, " chopping reads to kmers: %ds,\n", t1 );
+ fprintf ( stderr, " searching kmers: %ds,\n", t2 );
+ fprintf ( stderr, " aligning reads to edges: %ds,\n", t3 );
+ fprintf ( stderr, " searching (K+1)mers: %ds,\n", t4 );
+ fprintf ( stderr, " adding pre-arcs: %ds,\n", t5 );
+ fprintf ( stderr, " recording read paths: %ds.\n", t6 );
+
+ if ( read_c )
+ {
+ indexArray[read_c] = kmer_c;
+ sendWorkSignal ( 2, thrdSignal ); //chopKmer4read
+ sendWorkSignal ( 1, thrdSignal ); //searchKmer
+ sendWorkSignal ( 3, thrdSignal ); //parse1read
+ sendWorkSignal ( 4, thrdSignal ); //search1kmerPlus
+ sendWorkSignal ( 6, thrdSignal ); //thread_add1preArc
+
+ //recordPreArc();
+ if ( repsTie )
+ {
+ recordPathBin ( outfp );
+ }
+ }
+
+ fprintf ( stderr, "%lld marker(s) output.\n", markCounter );
+ sendWorkSignal ( 5, thrdSignal ); //over
+ thread_wait ( threads );
+ output_arcs ( outfile );
+ memoFree4preArc ();
+
+ if ( 1 ) // multi-threads
+ {
+ arcCounter = 0;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ arcCounter += arcCounters[i];
+ free ( ( void * ) flags[i + 1] );
+ deletion[0] += deletion[i + 1];
+ free ( ( void * ) rcSeq[i + 1] );
+ }
+ }
+
+ if ( 1 )
+ {
+ free ( ( void * ) flags[0] );
+ free ( ( void * ) rcSeq[0] );
+ }
+
+ fprintf ( stderr, "Reads alignment done, %d read(s) deleted, %lld pre-arc(s) added.\n", deletion[0], arcCounter );
+
+ if ( repsTie )
+ {
+ free ( ( void * ) markerOnEdge );
+ free ( ( void * ) fwriteBuf );
+ }
+
+ free ( ( void * ) arcCounters );
+ free ( ( void * ) rcSeq );
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ free ( ( void * ) seqBuffer[i] );
+ }
+
+ free ( ( void * ) seqBuffer );
+ free ( ( void * ) lenBuffer );
+ free ( ( void * ) indexArray );
+ free ( ( void * ) flags );
+ free ( ( void * ) deletion );
+ free ( ( void * ) kmerBuffer );
+ free ( ( void * ) mixBuffer );
+ free ( ( void * ) smallerBuffer );
+ free ( ( void * ) flagArray );
+ free ( ( void * ) hashBanBuffer );
+ free ( ( void * ) nodeBuffer );
+ free ( ( void * ) src_name );
+ free ( ( void * ) next_name );
+ free ( ( void * ) aioBuffer1 );
+ free ( ( void * ) aioBuffer2 );
+ free ( ( void * ) readBuffer1 );
+ free ( ( void * ) readBuffer2 );
+ free ( ( void * ) cach1 );
+ free ( ( void * ) cach2 );
+
+ if ( gLineLen < maxReadLen )
+ {
+ free ( ( void * ) gStr );
+ gStr = NULL;
+ }
+
+ if ( repsTie )
+ {
+ fclose ( outfp );
+ }
+
+ free_pe_mem ();
+ free_libs ();
}
-static void thread_wait ( pthread_t * threads )
+static void thread_wait ( pthread_t *threads )
{
- int i;
+ int i;
- for ( i = 0; i < thrd_num; i++ )
- if ( threads[i] != 0 )
- {
- pthread_join ( threads[i], NULL );
- }
+ for ( i = 0; i < thrd_num; i++ )
+ if ( threads[i] != 0 )
+ {
+ pthread_join ( threads[i], NULL );
+ }
}
diff --git a/standardPregraph/prlReadFillGap.c b/standardPregraph/prlReadFillGap.c
index b1de77a..b8b08c4 100644
--- a/standardPregraph/prlReadFillGap.c
+++ b/standardPregraph/prlReadFillGap.c
@@ -1,7 +1,7 @@
/*
* prlReadFillGap.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -35,15 +35,15 @@ static int Ncounter;
static int allGaps;
// for multi threads
-static int * counters;
+static int *counters;
static pthread_mutex_t mutex;
static int scafBufSize = 100;
-static boolean * flagBuf;
-static unsigned char * thrdNoBuf;
-static STACK ** ctgStackBuffer;
+static boolean *flagBuf;
+static unsigned char *thrdNoBuf;
+static STACK **ctgStackBuffer;
static int scafCounter;
static int scafInBuf;
-static char * scaffBuffer;
+static char *scaffBuffer;
static void MarkCtgOccu ( unsigned int ctg );
@@ -59,1879 +59,1927 @@ static void printRead(int len,char *seq)
*/
static void attach1read2contig ( unsigned int ctgID, int len, int pos, long long starter )
{
- unsigned int ctg = index_array[ctgID]; //new index in contig array
-
- if ( isLargerThanTwin ( ctg ) )
- {
- ctg = getTwinCtg ( ctg ); // put all reads in one contig of a twin
- pos = contig_array[ctg].length + overlaplen - pos - len;
- }
-
- if ( !contig_array[ctg].closeReads )
- {
- contig_array[ctg].closeReads = ( STACK * ) createStack ( RDBLOCKSIZE, sizeof ( READNEARBY ) );
- }
-
- READNEARBY * rd = ( READNEARBY * ) stackPush ( contig_array[ctg].closeReads );
- rd->len = len;
- rd->dis = pos;
- rd->seqStarter = starter;
+ unsigned int ctg = index_array[ctgID]; //new index in contig array
+
+ if ( isLargerThanTwin ( ctg ) )
+ {
+ ctg = getTwinCtg ( ctg ); // put all reads in one contig of a twin
+ pos = contig_array[ctg].length + overlaplen - pos - len;
+ }
+
+ if ( !contig_array[ctg].closeReads )
+ {
+ contig_array[ctg].closeReads = ( STACK * ) createStack ( RDBLOCKSIZE, sizeof ( READNEARBY ) );
+ }
+
+ READNEARBY *rd = ( READNEARBY * ) stackPush ( contig_array[ctg].closeReads );
+ rd->len = len;
+ rd->dis = pos;
+ rd->seqStarter = starter;
}
static void convertIndex ()
{
- int * length_array = ( int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( int ) );
- unsigned int i;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- length_array[i] = 0;
- }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( index_array[i] > 0 )
- {
- length_array[index_array[i]] = i;
- }
- }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- index_array[i] = length_array[i];
- } //contig i with new index: index_array[i]
-
- free ( ( void * ) length_array );
+ int *length_array = ( int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( int ) );
+ unsigned int i;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ length_array[i] = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( index_array[i] > 0 )
+ {
+ length_array[index_array[i]] = i;
+ }
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ index_array[i] = length_array[i];
+ } //contig i with new index: index_array[i]
+
+ free ( ( void * ) length_array );
}
-static long long getRead1by1_gz ( gzFile * fp, DARRAY * readSeqInGap )
+static long long getRead1by1_gz ( gzFile *fp, DARRAY *readSeqInGap )
{
- long long readCounter = 0;
-
- if ( !fp )
- {
- return readCounter;
- }
-
- int len, ctgID, pos;
- long long starter;
- char * pt;
- char * freadBuf = ( char * ) ckalloc ( ( maxReadLen / 4 + 1 ) * sizeof ( char ) );
-
- while ( gzread ( fp, &len, sizeof ( int ) ) == 4 )
- {
- if ( gzread ( fp, &ctgID, sizeof ( int ) ) != 4 )
- {
- break;
- }
-
- if ( gzread ( fp, &pos, sizeof ( int ) ) != 4 )
- {
- break;
- }
-
- if ( gzread ( fp, freadBuf, sizeof ( char ) * ( unsigned ) ( len / 4 + 1 ) ) != ( unsigned ) ( len / 4 + 1 ) )
- {
- break;
- }
-
- //put seq to dynamic array
- starter = readSeqInGap->item_c;
-
- if ( !darrayPut ( readSeqInGap, starter + len / 4 ) ) // make sure there's room for this seq
- {
- break;
- }
-
- pt = ( char * ) darrayPut ( readSeqInGap, starter );
- bcopy ( freadBuf, pt, len / 4 + 1 );
- attach1read2contig ( ctgID, len, pos, starter );
- readCounter++;
- }
-
- free ( ( void * ) freadBuf );
- return readCounter;
+ long long readCounter = 0;
+
+ if ( !fp )
+ {
+ return readCounter;
+ }
+
+ int len, ctgID, pos;
+ long long starter;
+ char *pt;
+ char *freadBuf = ( char * ) ckalloc ( ( maxReadLen / 4 + 1 ) * sizeof ( char ) );
+
+ while ( gzread ( fp, &len, sizeof ( int ) ) == 4 )
+ {
+ if ( gzread ( fp, &ctgID, sizeof ( int ) ) != 4 )
+ {
+ break;
+ }
+
+ if ( gzread ( fp, &pos, sizeof ( int ) ) != 4 )
+ {
+ break;
+ }
+
+ if ( gzread ( fp, freadBuf, sizeof ( char ) * ( unsigned ) ( len / 4 + 1 ) ) != ( unsigned ) ( len / 4 + 1 ) )
+ {
+ break;
+ }
+
+ //put seq to dynamic array
+ starter = readSeqInGap->item_c;
+
+ if ( !darrayPut ( readSeqInGap, starter + len / 4 ) ) // make sure there's room for this seq
+ {
+ break;
+ }
+
+ pt = ( char * ) darrayPut ( readSeqInGap, starter );
+ bcopy ( freadBuf, pt, len / 4 + 1 );
+ attach1read2contig ( ctgID, len, pos, starter );
+ readCounter++;
+ }
+
+ free ( ( void * ) freadBuf );
+ return readCounter;
}
-static long long getRead1by1 ( FILE * fp, DARRAY * readSeqInGap )
+static long long getRead1by1 ( FILE *fp, DARRAY *readSeqInGap )
{
- long long readCounter = 0;
-
- if ( !fp )
- {
- return readCounter;
- }
-
- int len, ctgID, pos;
- long long starter;
- char * pt;
- char * freadBuf = ( char * ) ckalloc ( ( maxReadLen / 4 + 1 ) * sizeof ( char ) );
-
- while ( fread ( &len, sizeof ( int ), 1, fp ) == 1 )
- {
- if ( fread ( &ctgID, sizeof ( int ), 1, fp ) != 1 )
- {
- break;
- }
-
- if ( fread ( &pos, sizeof ( int ), 1, fp ) != 1 )
- {
- break;
- }
-
- if ( fread ( freadBuf, sizeof ( char ), len / 4 + 1, fp ) != ( unsigned ) ( len / 4 + 1 ) )
- {
- break;
- }
-
- //put seq to dynamic array
- starter = readSeqInGap->item_c;
-
- if ( !darrayPut ( readSeqInGap, starter + len / 4 ) ) // make sure there's room for this seq
- {
- break;
- }
-
- pt = ( char * ) darrayPut ( readSeqInGap, starter );
- bcopy ( freadBuf, pt, len / 4 + 1 );
- attach1read2contig ( ctgID, len, pos, starter );
- readCounter++;
- }
-
- free ( ( void * ) freadBuf );
- return readCounter;
+ long long readCounter = 0;
+
+ if ( !fp )
+ {
+ return readCounter;
+ }
+
+ int len, ctgID, pos;
+ long long starter;
+ char *pt;
+ char *freadBuf = ( char * ) ckalloc ( ( maxReadLen / 4 + 1 ) * sizeof ( char ) );
+
+ while ( fread ( &len, sizeof ( int ), 1, fp ) == 1 )
+ {
+ if ( fread ( &ctgID, sizeof ( int ), 1, fp ) != 1 )
+ {
+ break;
+ }
+
+ if ( fread ( &pos, sizeof ( int ), 1, fp ) != 1 )
+ {
+ break;
+ }
+
+ if ( fread ( freadBuf, sizeof ( char ), len / 4 + 1, fp ) != ( unsigned ) ( len / 4 + 1 ) )
+ {
+ break;
+ }
+
+ //put seq to dynamic array
+ starter = readSeqInGap->item_c;
+
+ if ( !darrayPut ( readSeqInGap, starter + len / 4 ) ) // make sure there's room for this seq
+ {
+ break;
+ }
+
+ pt = ( char * ) darrayPut ( readSeqInGap, starter );
+ bcopy ( freadBuf, pt, len / 4 + 1 );
+ attach1read2contig ( ctgID, len, pos, starter );
+ readCounter++;
+ }
+
+ free ( ( void * ) freadBuf );
+ return readCounter;
}
// Darray *readSeqInGap
-static boolean loadReads4gap ( char * graphfile )
+static boolean loadReads4gap ( char *graphfile )
{
- FILE * fp1, *fp2;
- gzFile * fp;
- char name[1024];
- long long readCounter;
-
- if ( COMPATIBLE_MODE == 1 )
- {
- sprintf ( name, "%s.readInGap", graphfile );
- fp1 = fopen ( name, "rb" );
- }
- else
- {
- sprintf ( name, "%s.readInGap.gz", graphfile );
- fp = gzopen ( name, "rb" );
- }
-
- sprintf ( name, "%s.longReadInGap", graphfile );
- fp2 = fopen ( name, "rb" );
-
- if ( COMPATIBLE_MODE == 1 && !fp1 && !fp2 )
- {
- fprintf ( stderr, "Can't open %s.readInGap and %s.longReadInGap!\n", graphfile, graphfile );
- return 0;
- }
- else if ( COMPATIBLE_MODE == 0 && !fp && !fp2 )
- {
- fprintf ( stderr, "Can't open %s.readInGap.gz and %s.longReadInGap!\n", graphfile, graphfile );
- return 0;
- }
-
- if ( !orig2new )
- {
- convertIndex ();
- orig2new = 1;
- }
-
- readSeqInGap = ( DARRAY * ) createDarray ( 1000000, sizeof ( char ) );
-
- if ( COMPATIBLE_MODE == 1 && fp1 )
- {
- readCounter = getRead1by1 ( fp1, readSeqInGap );
- fprintf ( stderr, "Loaded %lld reads from %s.readInGap.\n", readCounter, graphfile );
- fclose ( fp1 );
- }
- else if ( COMPATIBLE_MODE == 0 && fp )
- {
- readCounter = getRead1by1_gz ( fp, readSeqInGap );
- fprintf ( stderr, "Loaded %lld reads from %s.readInGap.\n", readCounter, graphfile );
- gzclose ( fp );
- }
-
- if ( fp2 )
- {
- readCounter = getRead1by1 ( fp2, readSeqInGap );
- fprintf ( stderr, "Loaded %lld reads from %s.LongReadInGap.\n", readCounter, graphfile );
- fclose ( fp2 );
- }
-
- return 1;
+ FILE *fp1, *fp2;
+ gzFile *fp;
+ char name[1024];
+ long long readCounter;
+
+ if ( COMPATIBLE_MODE == 1 )
+ {
+ sprintf ( name, "%s.readInGap", graphfile );
+ fp1 = fopen ( name, "rb" );
+ }
+ else
+ {
+ sprintf ( name, "%s.readInGap.gz", graphfile );
+ fp = gzopen ( name, "rb" );
+ }
+
+ sprintf ( name, "%s.longReadInGap", graphfile );
+ fp2 = fopen ( name, "rb" );
+
+ if ( COMPATIBLE_MODE == 1 && !fp1 && !fp2 )
+ {
+ fprintf ( stderr, "Can't open %s.readInGap and %s.longReadInGap!\n", graphfile, graphfile );
+ return 0;
+ }
+ else if ( COMPATIBLE_MODE == 0 && !fp && !fp2 )
+ {
+ fprintf ( stderr, "Can't open %s.readInGap.gz and %s.longReadInGap!\n", graphfile, graphfile );
+ return 0;
+ }
+
+ if ( !orig2new )
+ {
+ convertIndex ();
+ orig2new = 1;
+ }
+
+ readSeqInGap = ( DARRAY * ) createDarray ( 1000000, sizeof ( char ) );
+
+ if ( COMPATIBLE_MODE == 1 && fp1 )
+ {
+ readCounter = getRead1by1 ( fp1, readSeqInGap );
+ fprintf ( stderr, "Loaded %lld reads from %s.readInGap.\n", readCounter, graphfile );
+ fclose ( fp1 );
+ }
+ else if ( COMPATIBLE_MODE == 0 && fp )
+ {
+ readCounter = getRead1by1_gz ( fp, readSeqInGap );
+ fprintf ( stderr, "Loaded %lld reads from %s.readInGap.\n", readCounter, graphfile );
+ gzclose ( fp );
+ }
+
+ if ( fp2 )
+ {
+ readCounter = getRead1by1 ( fp2, readSeqInGap );
+ fprintf ( stderr, "Loaded %lld reads from %s.LongReadInGap.\n", readCounter, graphfile );
+ fclose ( fp2 );
+ }
+
+ return 1;
}
static void debugging1 ()
{
- unsigned int i;
-
- if ( orig2new )
- {
- unsigned int * length_array = ( unsigned int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( unsigned int ) );
-
- //use length_array to change info in index_array
- for ( i = 1; i <= num_ctg; i++ )
- {
- length_array[i] = 0;
- }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( index_array[i] > 0 )
- {
- length_array[index_array[i]] = i;
- }
- }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- index_array[i] = length_array[i];
- } //contig i with original index: index_array[i]
-
- orig2new = 0;
- }
-
- READNEARBY * rd;
- int j;
- char * pt;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( !contig_array[i].closeReads )
- {
- continue;
- }
-
- if ( index_array[i] != 735 )
- {
- continue;
- }
-
- fprintf ( stderr, "Contig %d, len %d: \n", index_array[i], contig_array[i].length );
- stackBackup ( contig_array[i].closeReads );
-
- while ( ( rd = ( READNEARBY * ) stackPop ( contig_array[i].closeReads ) ) != NULL )
- {
- fprintf ( stderr, "%d\t%d\t%lld\t", rd->dis, rd->len, rd->seqStarter );
- pt = ( char * ) darrayGet ( readSeqInGap, rd->seqStarter );
-
- for ( j = 0; j < rd->len; j++ )
- {
- fprintf ( stderr, "%c", int2base ( ( int ) getCharInTightString ( pt, j ) ) );
- }
-
- fprintf ( stderr, "\n" );
- }
-
- stackRecover ( contig_array[i].closeReads );
- }
+ unsigned int i;
+
+ if ( orig2new )
+ {
+ unsigned int *length_array = ( unsigned int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( unsigned int ) );
+
+ //use length_array to change info in index_array
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ length_array[i] = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( index_array[i] > 0 )
+ {
+ length_array[index_array[i]] = i;
+ }
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ index_array[i] = length_array[i];
+ } //contig i with original index: index_array[i]
+
+ orig2new = 0;
+ }
+
+ READNEARBY *rd;
+ int j;
+ char *pt;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( !contig_array[i].closeReads )
+ {
+ continue;
+ }
+
+ if ( index_array[i] != 735 )
+ {
+ continue;
+ }
+
+ fprintf ( stderr, "Contig %d, len %d: \n", index_array[i], contig_array[i].length );
+ stackBackup ( contig_array[i].closeReads );
+
+ while ( ( rd = ( READNEARBY * ) stackPop ( contig_array[i].closeReads ) ) != NULL )
+ {
+ fprintf ( stderr, "%d\t%d\t%lld\t", rd->dis, rd->len, rd->seqStarter );
+ pt = ( char * ) darrayGet ( readSeqInGap, rd->seqStarter );
+
+ for ( j = 0; j < rd->len; j++ )
+ {
+ fprintf ( stderr, "%c", int2base ( ( int ) getCharInTightString ( pt, j ) ) );
+ }
+
+ fprintf ( stderr, "\n" );
+ }
+
+ stackRecover ( contig_array[i].closeReads );
+ }
}
-static void initiateCtgInScaf ( CTGinSCAF * actg )
+static void initiateCtgInScaf ( CTGinSCAF *actg )
{
- actg->cutTail = 0;
- actg->cutHead = overlaplen;
- actg->gapSeqLen = 0;
+ actg->cutTail = 0;
+ actg->cutHead = overlaplen;
+ actg->gapSeqLen = 0;
}
-static int procGap ( char * line, STACK * ctgsStack )
+static int procGap ( char *line, STACK *ctgsStack )
{
- char * tp;
- int length, i, seg;
- unsigned int ctg;
- CTGinSCAF * ctgPt;
- tp = strtok ( line, " " );
- tp = strtok ( NULL, " " ); //length
- length = atoi ( tp );
- tp = strtok ( NULL, " " ); //seg
- seg = atoi ( tp );
-
- if ( !seg )
- {
- return length;
- }
-
- for ( i = 0; i < seg; i++ )
- {
- tp = strtok ( NULL, " " );
- ctg = atoi ( tp );
- MarkCtgOccu ( ctg );
- ctgPt = ( CTGinSCAF * ) stackPush ( ctgsStack );
- initiateCtgInScaf ( ctgPt );
- ctgPt->ctgID = ctg;
- ctgPt->start = 0;
- ctgPt->end = 0;
- ctgPt->scaftig_start = 0;
- ctgPt->mask = 1;
- }
-
- return length;
+ char *tp;
+ int length, i, seg;
+ unsigned int ctg;
+ CTGinSCAF *ctgPt;
+ tp = strtok ( line, " " );
+ tp = strtok ( NULL, " " ); //length
+ length = atoi ( tp );
+ tp = strtok ( NULL, " " ); //seg
+ seg = atoi ( tp );
+
+ if ( !seg )
+ {
+ return length;
+ }
+
+ for ( i = 0; i < seg; i++ )
+ {
+ tp = strtok ( NULL, " " );
+ ctg = atoi ( tp );
+ MarkCtgOccu ( ctg );
+ ctgPt = ( CTGinSCAF * ) stackPush ( ctgsStack );
+ initiateCtgInScaf ( ctgPt );
+ ctgPt->ctgID = ctg;
+ ctgPt->start = 0;
+ ctgPt->end = 0;
+ ctgPt->scaftig_start = 0;
+ ctgPt->mask = 1;
+ }
+
+ return length;
}
-static void debugging2 ( int index, STACK * ctgsStack )
+static void debugging2 ( int index, STACK *ctgsStack )
{
- CTGinSCAF * actg;
- stackBackup ( ctgsStack );
- fprintf ( stderr, ">scaffold%d\t%d 0.0\n", index, ctgsStack->item_c );
+ CTGinSCAF *actg;
+ stackBackup ( ctgsStack );
+ fprintf ( stderr, ">scaffold%d\t%d 0.0\n", index, ctgsStack->item_c );
- while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
- {
- fprintf ( stderr, "%d\t%d\t%d\t%d\n", actg->ctgID, actg->start, actg->end, actg->scaftig_start );
- }
+ while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
+ {
+ fprintf ( stderr, "%d\t%d\t%d\t%d\n", actg->ctgID, actg->start, actg->end, actg->scaftig_start );
+ }
- stackRecover ( ctgsStack );
+ stackRecover ( ctgsStack );
}
-static int cmp_reads ( const void * a, const void * b )
+static int cmp_reads ( const void *a, const void *b )
{
- READNEARBY * A, *B;
- A = ( READNEARBY * ) a;
- B = ( READNEARBY * ) b;
-
- if ( A->dis > B->dis )
- {
- return 1;
- }
- else if ( A->dis == B->dis )
- {
- return 0;
- }
- else
- {
- return -1;
- }
+ READNEARBY *A, *B;
+ A = ( READNEARBY * ) a;
+ B = ( READNEARBY * ) b;
+
+ if ( A->dis > B->dis )
+ {
+ return 1;
+ }
+ else if ( A->dis == B->dis )
+ {
+ return 0;
+ }
+ else
+ {
+ return -1;
+ }
}
-static void cutRdArray ( READNEARBY * rdArray, int gapStart, int gapEnd, int * count, int arrayLen, READNEARBY * cutArray )
+static void cutRdArray ( READNEARBY *rdArray, int gapStart, int gapEnd, int *count, int arrayLen, READNEARBY *cutArray )
{
- int i;
- int num = 0;
-
- for ( i = 0; i < arrayLen; i++ )
- {
- if ( rdArray[i].dis > gapEnd )
- {
- break;
- }
-
- if ( ( rdArray[i].dis + rdArray[i].len ) >= gapStart )
- {
- cutArray[num].dis = rdArray[i].dis;
- cutArray[num].len = rdArray[i].len;
- cutArray[num++].seqStarter = rdArray[i].seqStarter;
- }
- }
-
- *count = num;
+ int i;
+ int num = 0;
+
+ for ( i = 0; i < arrayLen; i++ )
+ {
+ if ( rdArray[i].dis > gapEnd )
+ {
+ break;
+ }
+
+ if ( ( rdArray[i].dis + rdArray[i].len ) >= gapStart )
+ {
+ cutArray[num].dis = rdArray[i].dis;
+ cutArray[num].len = rdArray[i].len;
+ cutArray[num++].seqStarter = rdArray[i].seqStarter;
+ }
+ }
+
+ *count = num;
}
-static void outputTightStr2Visual ( FILE * foc2, int ctg, int * num, int start, int length, int outputlen, int revS )
+static void outputTightStr2Visual ( FILE *foc2, int ctg, int *num, int start, int length, int outputlen, int revS )
{
- int i, end, column = 0, n = *num;
- char * tightStr = contig_array[ctg].seq;
-
- if ( n == 1 ) { fprintf ( foc2, ">%d\n", index_array[ctg] ); }
- else { fprintf ( foc2, ">%d-%d\n", index_array[ctg], n - 1 ); }
-
- if ( !revS )
- {
- end = start + outputlen <= length ? start + outputlen : length;
-
- for ( i = start; i < end; i++ )
- {
- fprintf ( foc2, "%c", int2base ( ( int ) getCharInTightString ( tightStr, i ) ) );
-
- if ( ( ++column ) % 100 == 0 )
- {
- //column = 0;
- fprintf ( foc2, "\n" );
- }
- }
-
- if ( column % 100 != 0 )
- {
- fprintf ( foc2, "\n" );
- }
- }
- else
- {
- end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
-
- for ( i = end; i <= length - 1 - start; i++ )
- {
- fprintf ( foc2, "%c", int2base ( ( int ) getCharInTightString ( tightStr, i ) ) );
-
- if ( ( ++column ) % 100 == 0 )
- {
- fprintf ( foc2, "\n" );
- //column = 0;
- }
- }
-
- if ( column % 100 != 0 )
- {
- fprintf ( foc2, "\n" );
- }
- }
+ int i, end, column = 0, n = *num;
+ char *tightStr = contig_array[ctg].seq;
+
+ if ( n == 1 )
+ {
+ fprintf ( foc2, ">%d\n", index_array[ctg] );
+ }
+ else
+ {
+ fprintf ( foc2, ">%d-%d\n", index_array[ctg], n - 1 );
+ }
+
+ if ( !revS )
+ {
+ end = start + outputlen <= length ? start + outputlen : length;
+
+ for ( i = start; i < end; i++ )
+ {
+ fprintf ( foc2, "%c", int2base ( ( int ) getCharInTightString ( tightStr, i ) ) );
+
+ if ( ( ++column ) % 100 == 0 )
+ {
+ //column = 0;
+ fprintf ( foc2, "\n" );
+ }
+ }
+
+ if ( column % 100 != 0 )
+ {
+ fprintf ( foc2, "\n" );
+ }
+ }
+ else
+ {
+ end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
+
+ for ( i = end; i <= length - 1 - start; i++ )
+ {
+ fprintf ( foc2, "%c", int2base ( ( int ) getCharInTightString ( tightStr, i ) ) );
+
+ if ( ( ++column ) % 100 == 0 )
+ {
+ fprintf ( foc2, "\n" );
+ //column = 0;
+ }
+ }
+
+ if ( column % 100 != 0 )
+ {
+ fprintf ( foc2, "\n" );
+ }
+ }
}
-void outputTightStr ( FILE * fp, char * tightStr, int start, int length, int outputlen, int revS, int * col )
+void outputTightStr ( FILE *fp, char *tightStr, int start, int length, int outputlen, int revS, int *col )
{
- int i;
- int end;
- int column = *col;
-
- if ( !revS )
- {
- end = start + outputlen <= length ? start + outputlen : length;
-
- for ( i = start; i < end; i++ )
- {
- fprintf ( fp, "%c", int2base ( ( int ) getCharInTightString ( tightStr, i ) ) );
-
- if ( ( ++column ) % 100 == 0 )
- {
- //column = 0;
- fprintf ( fp, "\n" );
- }
- }
- }
- else
- {
- end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
-
- for ( i = length - 1 - start; i >= end; i-- )
- {
- fprintf ( fp, "%c", int2compbase ( ( int ) getCharInTightString ( tightStr, i ) ) );
-
- if ( ( ++column ) % 100 == 0 )
- {
- fprintf ( fp, "\n" );
- //column = 0;
- }
- }
- }
-
- *col = column;
+ int i;
+ int end;
+ int column = *col;
+
+ if ( !revS )
+ {
+ end = start + outputlen <= length ? start + outputlen : length;
+
+ for ( i = start; i < end; i++ )
+ {
+ fprintf ( fp, "%c", int2base ( ( int ) getCharInTightString ( tightStr, i ) ) );
+
+ if ( ( ++column ) % 100 == 0 )
+ {
+ //column = 0;
+ fprintf ( fp, "\n" );
+ }
+ }
+ }
+ else
+ {
+ end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
+
+ for ( i = length - 1 - start; i >= end; i-- )
+ {
+ fprintf ( fp, "%c", int2compbase ( ( int ) getCharInTightString ( tightStr, i ) ) );
+
+ if ( ( ++column ) % 100 == 0 )
+ {
+ fprintf ( fp, "\n" );
+ //column = 0;
+ }
+ }
+ }
+
+ *col = column;
}
-static void outputTightStr2 ( char * tightStr, char * scaff, int start, int length, int outputlen, int revS, int * col )
+static void outputTightStr2 ( char *tightStr, char *scaff, int start, int length, int outputlen, int revS, int *col )
{
- int i;
- int end;
- int column = *col;
- char a;
-
- if ( !revS )
- {
- end = start + outputlen <= length ? start + outputlen : length;
-
- for ( i = start; i < end; i++ )
- {
- a = int2base ( ( int ) getCharInTightString ( tightStr, i ) );
- // fprintf (fp, "%c", int2base ((int) getCharInTightString (tightStr, i)));
- scaff[column++] = a;
- }
- }
- else
- {
- end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
-
- for ( i = length - 1 - start; i >= end; i-- )
- {
- a = int2compbase ( ( int ) getCharInTightString ( tightStr, i ) );
- // fprintf (fp, "%c", int2compbase ((int) getCharInTightString (tightStr, i)));
- scaff[column++] = a;
- }
- }
-
- *col = column;
+ int i;
+ int end;
+ int column = *col;
+ char a;
+
+ if ( !revS )
+ {
+ end = start + outputlen <= length ? start + outputlen : length;
+
+ for ( i = start; i < end; i++ )
+ {
+ a = int2base ( ( int ) getCharInTightString ( tightStr, i ) );
+ // fprintf (fp, "%c", int2base ((int) getCharInTightString (tightStr, i)));
+ scaff[column++] = a;
+ }
+ }
+ else
+ {
+ end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
+
+ for ( i = length - 1 - start; i >= end; i-- )
+ {
+ a = int2compbase ( ( int ) getCharInTightString ( tightStr, i ) );
+ // fprintf (fp, "%c", int2compbase ((int) getCharInTightString (tightStr, i)));
+ scaff[column++] = a;
+ }
+ }
+
+ *col = column;
}
-static void outputTightStrLowerCase2Visual ( FILE * foc2, int gapNum, char * tightStr, int start, int length, int outputlen )
+static void outputTightStrLowerCase2Visual ( FILE *foc2, int gapNum, char *tightStr, int start, int length, int outputlen )
{
- int i, end, column = 0;
- end = start + outputlen <= length ? start + outputlen : length;
- fprintf ( foc2, ">%d-0\n", gapNum );
-
- for ( i = start; i < end; i++ )
- {
- fprintf ( foc2, "%c", "actg"[ ( int ) getCharInTightString ( tightStr, i )] );
-
- if ( ( ++column ) % 100 == 0 )
- {
- //column = 0;
- fprintf ( foc2, "\n" );
- }
- }
-
- if ( column % 100 != 0 )
- {
- fprintf ( foc2, "\n" );
- }
+ int i, end, column = 0;
+ end = start + outputlen <= length ? start + outputlen : length;
+ fprintf ( foc2, ">%d-0\n", gapNum );
+
+ for ( i = start; i < end; i++ )
+ {
+ fprintf ( foc2, "%c", "actg"[ ( int ) getCharInTightString ( tightStr, i )] );
+
+ if ( ( ++column ) % 100 == 0 )
+ {
+ //column = 0;
+ fprintf ( foc2, "\n" );
+ }
+ }
+
+ if ( column % 100 != 0 )
+ {
+ fprintf ( foc2, "\n" );
+ }
}
-static void outputTightStrLowerCase ( FILE * fp, char * tightStr, int start, int length, int outputlen, int revS, int * col )
+static void outputTightStrLowerCase ( FILE *fp, char *tightStr, int start, int length, int outputlen, int revS, int *col )
{
- int i;
- int end;
- int column = *col;
-
- if ( !revS )
- {
- end = start + outputlen <= length ? start + outputlen : length;
-
- for ( i = start; i < end; i++ )
- {
- fprintf ( fp, "%c", "actg"[ ( int ) getCharInTightString ( tightStr, i )] );
-
- if ( ( ++column ) % 100 == 0 )
- {
- //column = 0;
- fprintf ( fp, "\n" );
- }
- }
- }
- else
- {
- end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
-
- for ( i = length - 1 - start; i >= end; i-- )
- {
- fprintf ( fp, "%c", "tgac"[ ( int ) getCharInTightString ( tightStr, i )] );
-
- if ( ( ++column ) % 100 == 0 )
- {
- fprintf ( fp, "\n" );
- //column = 0;
- }
- }
- }
-
- *col = column;
+ int i;
+ int end;
+ int column = *col;
+
+ if ( !revS )
+ {
+ end = start + outputlen <= length ? start + outputlen : length;
+
+ for ( i = start; i < end; i++ )
+ {
+ fprintf ( fp, "%c", "actg"[ ( int ) getCharInTightString ( tightStr, i )] );
+
+ if ( ( ++column ) % 100 == 0 )
+ {
+ //column = 0;
+ fprintf ( fp, "\n" );
+ }
+ }
+ }
+ else
+ {
+ end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
+
+ for ( i = length - 1 - start; i >= end; i-- )
+ {
+ fprintf ( fp, "%c", "tgac"[ ( int ) getCharInTightString ( tightStr, i )] );
+
+ if ( ( ++column ) % 100 == 0 )
+ {
+ fprintf ( fp, "\n" );
+ //column = 0;
+ }
+ }
+ }
+
+ *col = column;
}
-static void outputTightStrLowerCase2 ( char * tightStr, char * scaff, int start, int length, int outputlen, int revS, int * col )
+static void outputTightStrLowerCase2 ( char *tightStr, char *scaff, int start, int length, int outputlen, int revS, int *col )
{
- int i;
- int end;
- int column = *col;
- char a;
-
- if ( !revS )
- {
- end = start + outputlen <= length ? start + outputlen : length;
-
- for ( i = start; i < end; i++ )
- {
- a = "actg"[ ( int ) getCharInTightString ( tightStr, i )];
- // fprintf (fp, "%c", "actg"[(int) getCharInTightString (tightStr, i)]);
- scaff[column++] = a;
- }
- }
- else
- {
- end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
-
- for ( i = length - 1 - start; i >= end; i-- )
- {
- a = "tgac"[ ( int ) getCharInTightString ( tightStr, i )];
- // fprintf (fp, "%c", "tgac"[(int) getCharInTightString (tightStr, i)]);
- scaff[column++] = a;
- }
- }
-
- *col = column;
+ int i;
+ int end;
+ int column = *col;
+ char a;
+
+ if ( !revS )
+ {
+ end = start + outputlen <= length ? start + outputlen : length;
+
+ for ( i = start; i < end; i++ )
+ {
+ a = "actg"[ ( int ) getCharInTightString ( tightStr, i )];
+ // fprintf (fp, "%c", "actg"[(int) getCharInTightString (tightStr, i)]);
+ scaff[column++] = a;
+ }
+ }
+ else
+ {
+ end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
+
+ for ( i = length - 1 - start; i >= end; i-- )
+ {
+ a = "tgac"[ ( int ) getCharInTightString ( tightStr, i )];
+ // fprintf (fp, "%c", "tgac"[(int) getCharInTightString (tightStr, i)]);
+ scaff[column++] = a;
+ }
+ }
+
+ *col = column;
}
-static void outputNs ( FILE * fp, int gapN, int * col )
+static void outputNs ( FILE *fp, int gapN, int *col )
{
- int i, column = *col;
+ int i, column = *col;
- for ( i = 0; i < gapN; i++ )
- {
- fprintf ( fp, "N" );
+ for ( i = 0; i < gapN; i++ )
+ {
+ fprintf ( fp, "N" );
- if ( ( ++column ) % 100 == 0 )
- {
- //column = 0;
- fprintf ( fp, "\n" );
- }
- }
+ if ( ( ++column ) % 100 == 0 )
+ {
+ //column = 0;
+ fprintf ( fp, "\n" );
+ }
+ }
- *col = column;
+ *col = column;
}
-static void outputNs2 ( char * scaff, int gapN, int * col )
+static void outputNs2 ( char *scaff, int gapN, int *col )
{
- int i, column = *col;
+ int i, column = *col;
- for ( i = 0; i < gapN; i++ )
- {
- scaff[column] = 'N';
- column++;
- }
+ for ( i = 0; i < gapN; i++ )
+ {
+ scaff[column] = 'N';
+ column++;
+ }
- *col = column;
+ *col = column;
}
static void outputGapInfo ( unsigned int ctg1, unsigned int ctg2 )
{
- unsigned int bal_ctg1 = getTwinCtg ( ctg1 );
- unsigned int bal_ctg2 = getTwinCtg ( ctg2 );
-
- if ( isLargerThanTwin ( ctg1 ) )
- {
- fprintf ( stderr, "%d\t", index_array[bal_ctg1] );
- }
- else
- {
- fprintf ( stderr, "%d\t", index_array[ctg1] );
- }
-
- if ( isLargerThanTwin ( ctg2 ) )
- {
- fprintf ( stderr, "%d\n", index_array[bal_ctg2] );
- }
- else
- {
- fprintf ( stderr, "%d\n", index_array[ctg2] );
- }
+ unsigned int bal_ctg1 = getTwinCtg ( ctg1 );
+ unsigned int bal_ctg2 = getTwinCtg ( ctg2 );
+
+ if ( isLargerThanTwin ( ctg1 ) )
+ {
+ fprintf ( stderr, "%d\t", index_array[bal_ctg1] );
+ }
+ else
+ {
+ fprintf ( stderr, "%d\t", index_array[ctg1] );
+ }
+
+ if ( isLargerThanTwin ( ctg2 ) )
+ {
+ fprintf ( stderr, "%d\n", index_array[bal_ctg2] );
+ }
+ else
+ {
+ fprintf ( stderr, "%d\n", index_array[ctg2] );
+ }
}
-static void output1gap ( FILE * fo, int scafIndex, CTGinSCAF * prevCtg, CTGinSCAF * actg, DARRAY * gapSeqArray )
+static void output1gap ( FILE *fo, int scafIndex, CTGinSCAF *prevCtg, CTGinSCAF *actg, DARRAY *gapSeqArray )
{
- unsigned int ctg1, bal_ctg1, length1;
- int start1, outputlen1;
- unsigned int ctg2, bal_ctg2, length2;
- int start2, outputlen2;
- char * pt;
- int column = 0;
- ctg1 = prevCtg->ctgID;
- bal_ctg1 = getTwinCtg ( ctg1 );
- start1 = prevCtg->cutHead;
- length1 = contig_array[ctg1].length + overlaplen;
-
- if ( length1 - prevCtg->cutTail - start1 > CTGappend )
- {
- outputlen1 = CTGappend;
- start1 = length1 - prevCtg->cutTail - outputlen1;
- }
- else
- {
- outputlen1 = length1 - prevCtg->cutTail - start1;
- }
-
- ctg2 = actg->ctgID;
- bal_ctg2 = getTwinCtg ( ctg2 );
- start2 = actg->cutHead;
- length2 = contig_array[ctg2].length + overlaplen;
-
- if ( length2 - actg->cutTail - start2 > CTGappend )
- {
- outputlen2 = CTGappend;
- }
- else
- {
- outputlen2 = length2 - actg->cutTail - start2;
- }
-
- if ( isLargerThanTwin ( ctg1 ) )
- {
- fprintf ( fo, ">S%d_C%d_L%d_G%d", scafIndex, index_array[bal_ctg1], outputlen1, prevCtg->gapSeqLen );
- }
- else
- {
- fprintf ( fo, ">S%d_C%d_L%d_G%d", scafIndex, index_array[ctg1], outputlen1, prevCtg->gapSeqLen );
- }
-
- if ( isLargerThanTwin ( ctg2 ) )
- {
- fprintf ( fo, "_C%d_L%d\t", index_array[bal_ctg2], outputlen2 );
- }
- else
- {
- fprintf ( fo, "_C%d_L%d\t", index_array[ctg2], outputlen2 );
- }
-
- fprintf ( fo, "%d\n", start2 );
-
- if ( contig_array[ctg1].seq )
- {
- outputTightStr ( fo, contig_array[ctg1].seq, start1, length1, outputlen1, 0, &column );
- }
- else if ( contig_array[bal_ctg1].seq )
- {
- outputTightStr ( fo, contig_array[bal_ctg1].seq, start1, length1, outputlen1, 1, &column );
- }
-
- pt = ( char * ) darrayPut ( gapSeqArray, prevCtg->gapSeqOffset );
- outputTightStrLowerCase ( fo, pt, 0, prevCtg->gapSeqLen, prevCtg->gapSeqLen, 0, &column );
-
- if ( contig_array[ctg2].seq )
- {
- outputTightStr ( fo, contig_array[ctg2].seq, start2, length2, outputlen2, 0, &column );
- }
- else if ( contig_array[bal_ctg2].seq )
- {
- outputTightStr ( fo, contig_array[bal_ctg2].seq, start2, length2, outputlen2, 1, &column );
- }
-
- if ( column % 100 != 0 )
- {
- fprintf ( fo, "\n" );
- }
+ unsigned int ctg1, bal_ctg1, length1;
+ int start1, outputlen1;
+ unsigned int ctg2, bal_ctg2, length2;
+ int start2, outputlen2;
+ char *pt;
+ int column = 0;
+ ctg1 = prevCtg->ctgID;
+ bal_ctg1 = getTwinCtg ( ctg1 );
+ start1 = prevCtg->cutHead;
+ length1 = contig_array[ctg1].length + overlaplen;
+
+ if ( length1 - prevCtg->cutTail - start1 > CTGappend )
+ {
+ outputlen1 = CTGappend;
+ start1 = length1 - prevCtg->cutTail - outputlen1;
+ }
+ else
+ {
+ outputlen1 = length1 - prevCtg->cutTail - start1;
+ }
+
+ ctg2 = actg->ctgID;
+ bal_ctg2 = getTwinCtg ( ctg2 );
+ start2 = actg->cutHead;
+ length2 = contig_array[ctg2].length + overlaplen;
+
+ if ( length2 - actg->cutTail - start2 > CTGappend )
+ {
+ outputlen2 = CTGappend;
+ }
+ else
+ {
+ outputlen2 = length2 - actg->cutTail - start2;
+ }
+
+ if ( isLargerThanTwin ( ctg1 ) )
+ {
+ fprintf ( fo, ">S%d_C%d_L%d_G%d", scafIndex, index_array[bal_ctg1], outputlen1, prevCtg->gapSeqLen );
+ }
+ else
+ {
+ fprintf ( fo, ">S%d_C%d_L%d_G%d", scafIndex, index_array[ctg1], outputlen1, prevCtg->gapSeqLen );
+ }
+
+ if ( isLargerThanTwin ( ctg2 ) )
+ {
+ fprintf ( fo, "_C%d_L%d\t", index_array[bal_ctg2], outputlen2 );
+ }
+ else
+ {
+ fprintf ( fo, "_C%d_L%d\t", index_array[ctg2], outputlen2 );
+ }
+
+ fprintf ( fo, "%d\n", start2 );
+
+ if ( contig_array[ctg1].seq )
+ {
+ outputTightStr ( fo, contig_array[ctg1].seq, start1, length1, outputlen1, 0, &column );
+ }
+ else if ( contig_array[bal_ctg1].seq )
+ {
+ outputTightStr ( fo, contig_array[bal_ctg1].seq, start1, length1, outputlen1, 1, &column );
+ }
+
+ pt = ( char * ) darrayPut ( gapSeqArray, prevCtg->gapSeqOffset );
+ outputTightStrLowerCase ( fo, pt, 0, prevCtg->gapSeqLen, prevCtg->gapSeqLen, 0, &column );
+
+ if ( contig_array[ctg2].seq )
+ {
+ outputTightStr ( fo, contig_array[ctg2].seq, start2, length2, outputlen2, 0, &column );
+ }
+ else if ( contig_array[bal_ctg2].seq )
+ {
+ outputTightStr ( fo, contig_array[bal_ctg2].seq, start2, length2, outputlen2, 1, &column );
+ }
+
+ if ( column % 100 != 0 )
+ {
+ fprintf ( fo, "\n" );
+ }
}
-static void outputGapSeq ( FILE * fo, int index, STACK * ctgsStack, DARRAY * gapSeqArray )
+static void outputGapSeq ( FILE *fo, int index, STACK *ctgsStack, DARRAY *gapSeqArray )
{
- CTGinSCAF * actg, *prevCtg = NULL;
- stackRecover ( ctgsStack );
- // fprintf (fo, ">scaffold%d\n", index);
-
- while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
- {
- /* if (prevCtg)
- {
- if (actg->scaftig_start)
- {
- fprintf (fo, "0\t%d\t%d\n", prevCtg->mask, actg->mask);
- }
- else
- {
- fprintf (fo, "1\t%d\t%d\n", prevCtg->mask, actg->mask);
- }
- }
- */
- if ( prevCtg && prevCtg->gapSeqLen > 0 )
- { output1gap ( fo, index, prevCtg, actg, gapSeqArray ); }
-
- prevCtg = actg;
- }
+ CTGinSCAF *actg, *prevCtg = NULL;
+ stackRecover ( ctgsStack );
+ // fprintf (fo, ">scaffold%d\n", index);
+
+ while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
+ {
+ /* if (prevCtg)
+ {
+ if (actg->scaftig_start)
+ {
+ fprintf (fo, "0\t%d\t%d\n", prevCtg->mask, actg->mask);
+ }
+ else
+ {
+ fprintf (fo, "1\t%d\t%d\n", prevCtg->mask, actg->mask);
+ }
+ }
+ */
+ if ( prevCtg && prevCtg->gapSeqLen > 0 )
+ {
+ output1gap ( fo, index, prevCtg, actg, gapSeqArray );
+ }
+
+ prevCtg = actg;
+ }
}
-static void outputScafSeq ( FILE * fo, FILE * foc, FILE * foc2, FILE * fo3, int index, STACK * ctgsStack, DARRAY * gapSeqArray )
+static void outputScafSeq ( FILE *fo, FILE *foc, FILE *foc2, FILE *fo3, int index, STACK *ctgsStack, DARRAY *gapSeqArray )
{
- CTGinSCAF * actg, *prevCtg = NULL;
- unsigned int ctg, bal_ctg, ctg_out, length;
- int start, outputlen, gapN;
- char * pt;
- int column = 0;
- long long cvgSum = 0;
- int lenSum = 0;
- int i, t, lu_len = 0, lu_end = 0;
- unsigned int ctg_start_pos = 0;
- char strand;
- unsigned int * pos_start = ( unsigned int * ) ckalloc ( 1000000 * sizeof ( unsigned int ) );
- unsigned int * pos_end = ( unsigned int * ) ckalloc ( 1000000 * sizeof ( unsigned int ) );
- // char index_contig[num_ctg][20];
- char ** index_contig = ( char ** ) ckalloc ( 1000000 * sizeof ( char * ) );
-
- for ( i = 0; i < 1000000; i++ )
- { index_contig[i] = ( char * ) ckalloc ( 20 * sizeof ( char ) ); }
-
- char * orien_array;
- orien_array = ( char * ) ckalloc ( 1000000 * sizeof ( char ) );
- // scaffBuffer = (char *) ckalloc (300000000 * sizeof (char));
- stackRecover ( ctgsStack );
-
- while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
- {
- if ( ! ( contig_array[actg->ctgID].cvg > 0 ) )
- {
- continue;
- }
-
- lenSum += contig_array[actg->ctgID].length;
- cvgSum += contig_array[actg->ctgID].length * contig_array[actg->ctgID].cvg;
- }
-
- if ( lenSum > 0 )
- {
- fprintf ( fo, ">scaffold%d %4.1f\n", index, ( double ) cvgSum / lenSum );
- }
- else
- {
- fprintf ( fo, ">scaffold%d 0.0\n", index );
- }
-
- fprintf ( foc, ">scaffold%d\n", index );
- stackRecover ( ctgsStack );
-
- while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
- {
- ctg = actg->ctgID;
- bal_ctg = getTwinCtg ( ctg );
- length = contig_array[ctg].length + overlaplen;
-
- if ( prevCtg && actg->scaftig_start )
- {
- gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length;
- gapN = gapN > 0 ? gapN : 1;
- outputNs ( fo, gapN, &column );
- ctg_start_pos += gapN;
- //outputGapInfo(prevCtg->ctgID,ctg);
- Ncounter++;
- }
-
- if ( !prevCtg )
- {
- start = 0;
- }
- else
- {
- start = actg->cutHead;
- }
-
- outputlen = length - start - actg->cutTail;
-
- if ( contig_array[ctg].seq )
- {
- outputTightStr ( fo, contig_array[ctg].seq, start, length, outputlen, 0, &column );
- lu_end = start + outputlen > length ? length : start + outputlen;
- lu_len = lu_end - start;
- strand = '+';
- fprintf ( foc, "%d\t", index_array[ctg] );
- }
- else if ( contig_array[bal_ctg].seq )
- {
- outputTightStr ( fo, contig_array[bal_ctg].seq, start, length, outputlen, 1, &column );
- lu_end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
- lu_len = length - start - lu_end;
- strand = '-';
- fprintf ( foc, "%d\t", index_array[bal_ctg] );
- }
-
- fprintf ( foc, "%u\t%c\t%d\n", ctg_start_pos, strand, lu_len );
- ctg_start_pos += lu_len;
-
- if ( actg->gapSeqLen < 1 )
- {
- prevCtg = actg;
- continue;
- }
-
- pt = ( char * ) darrayPut ( gapSeqArray, actg->gapSeqOffset );
- outputTightStrLowerCase ( fo, pt, 0, actg->gapSeqLen, actg->gapSeqLen, 0, &column );
- ctg_start_pos = ctg_start_pos + actg->gapSeqLen;
- prevCtg = actg;
- }
-
- if ( column % 100 != 0 )
- {
- fprintf ( fo, "\n" );
- }
-
- if ( visual )
- {
- scaffBuffer = ( char * ) ckalloc ( ( ctg_start_pos + 5 ) * sizeof ( char ) );
- prevCtg = NULL;
- column = 0;
- ctg_start_pos = 0;
- lenSum = 0;
- stackRecover ( ctgsStack );
-
- while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
- {
- ctg = actg->ctgID;
- bal_ctg = getTwinCtg ( ctg );
- length = contig_array[ctg].length + overlaplen;
-
- if ( prevCtg && actg->scaftig_start )
- {
- gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length;
- gapN = gapN > 0 ? gapN : 1;
- outputNs2 ( scaffBuffer, gapN, &column );
- ctg_start_pos += gapN;
- // Ncounter++;
- }
-
- if ( !prevCtg )
- {
- start = 0;
- }
- else
- {
- start = actg->cutHead;
- }
-
- outputlen = length - start - actg->cutTail;
-
- if ( contig_array[ctg].seq )
- {
- t = ++contig_index_array[index_array[ctg]];
- outputTightStr2 ( contig_array[ctg].seq, scaffBuffer, start, length, outputlen, 0, &column );
- lu_end = start + outputlen > length ? length : start + outputlen;
- lu_len = lu_end - start;
- strand = '+';
- // fprintf (foc, "%d\t", index_array[ctg]);
- lenSum++;
-
- if ( ctg_start_pos - start >= 0 )
- {
- pos_start[lenSum] = ctg_start_pos - start;
- pos_end[lenSum] = ctg_start_pos + length - start;
- orien_array[lenSum] = '+';
-
- if ( t == 1 ) { sprintf ( index_contig[lenSum], "%u", index_array[ctg] ); }
- else { sprintf ( index_contig[lenSum], "%u-%d", index_array[ctg], t - 1 ); }
-
- fprintf ( fo3, "{AFG\nacc:%s\nclr:0,%d\n}\n", ( index_contig[lenSum] ), length );
- outputTightStr2Visual ( foc2, ctg, & ( contig_index_array[index_array[ctg]] ), 0, length, length, 0 );
- }
- else
- {
- pos_start[lenSum] = 0;
- pos_end[lenSum] = ctg_start_pos + length - start;
- orien_array[lenSum] = '+';
-
- if ( t == 1 ) { sprintf ( index_contig[lenSum], "%u", index_array[ctg] ); }
- else { sprintf ( index_contig[lenSum], "%u-%d", index_array[ctg], t - 1 ); }
-
- fprintf ( fo3, "{AFG\nacc:%s\nclr:0,%d\n}\n", ( index_contig[lenSum] ), length );
- outputTightStr2Visual ( foc2, ctg, & ( contig_index_array[index_array[ctg]] ), start - ctg_start_pos, length, length - start + ctg_start_pos, 0 );
- }
- }
- else if ( contig_array[bal_ctg].seq )
- {
- t = ++contig_index_array[index_array[bal_ctg]];
- outputTightStr2 ( contig_array[bal_ctg].seq, scaffBuffer, start, length, outputlen, 1, &column );
- lu_end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
- lu_len = length - start - lu_end;
- strand = '-';
- // fprintf (foc, "%d\t", index_array[bal_ctg]);
- lenSum++;
-
- if ( ctg_start_pos - start >= 0 )
- {
- pos_start[lenSum] = ctg_start_pos - start;
- pos_end[lenSum] = ctg_start_pos + length - start;
- orien_array[lenSum] = '-';
-
- if ( t == 1 ) { sprintf ( index_contig[lenSum], "%u", index_array[bal_ctg] ); }
- else { sprintf ( index_contig[lenSum], "%u-%d", index_array[bal_ctg], t - 1 ); }
-
- fprintf ( fo3, "{AFG\nacc:%s\nclr:0,%d\n}\n", ( index_contig[lenSum] ), length );
- outputTightStr2Visual ( foc2, bal_ctg, & ( contig_index_array[index_array[bal_ctg]] ), 0, length, length, 1 );
- }
- else
- {
- pos_start[lenSum] = ctg_start_pos;
- pos_end[lenSum] = ctg_start_pos + lu_len;
- orien_array[lenSum] = '-';
-
- if ( t == 1 ) { sprintf ( index_contig[lenSum], "%u", index_array[bal_ctg] ); }
- else { sprintf ( index_contig[lenSum], "%u-%d", index_array[bal_ctg], t - 1 ); }
-
- fprintf ( fo3, "{AFG\nacc:%s\nclr:0,%d\n}\n", ( index_contig[lenSum] ), length );
- outputTightStr2Visual ( foc2, bal_ctg, & ( contig_index_array[index_array[bal_ctg]] ), start, length, outputlen, 1 );
- }
- }
-
- // fprintf (foc, "%u\t%c\t%d\n", ctg_start_pos, strand, lu_len);
- ctg_start_pos += lu_len;
-
- if ( actg->gapSeqLen < 1 )
- {
- prevCtg = actg;
- continue;
- }
-
- pt = ( char * ) darrayPut ( gapSeqArray, actg->gapSeqOffset );
- outputTightStrLowerCase2 ( pt, scaffBuffer, 0, actg->gapSeqLen, actg->gapSeqLen, 0, &column );
- outputTightStrLowerCase2Visual ( foc2, gapNum, pt, 0, actg->gapSeqLen, actg->gapSeqLen );
- fprintf ( fo3, "{AFG\nacc:%d-0\nclr:0,%d\n}\n", gapNum, actg->gapSeqLen );
- lenSum++;
- pos_start[lenSum] = ctg_start_pos;
- pos_end[lenSum] = ctg_start_pos + actg->gapSeqLen;
- orien_array[lenSum] = '+';
- sprintf ( index_contig[lenSum], "%d-0", gapNum );
- gapNum++;
- ctg_start_pos = ctg_start_pos + actg->gapSeqLen;
- prevCtg = actg;
- }
-
- scaffNum++;
- fprintf ( fo3, "{CCO\nacc:%d\npla:P\nlen:%u\ncns:\n", scaffNum, ctg_start_pos );
-
- for ( i = 0; i < ctg_start_pos; i++ )
- {
- if ( i != 0 && i % 100 == 0 && i < ctg_start_pos - 1 ) { fprintf ( fo3, "\n" ); }
-
- fprintf ( fo3, "%c", scaffBuffer[i] );
- }
-
- fprintf ( fo3, "\n.\nqlt:\n" );
-
- for ( i = 0; i < ctg_start_pos; i++ )
- {
- if ( i != 0 && i % 100 == 0 && i < ctg_start_pos - 1 ) { fprintf ( fo3, "\n" ); }
-
- fprintf ( fo3, "D" );
- }
-
- fprintf ( fo3, "\n.\nnpc:%d\n", lenSum );
-
- for ( i = 1; i <= lenSum; i++ )
- {
- if ( orien_array[i] == '+' ) { fprintf ( fo3, "{MPS\ntyp:R\nmid:%s\nsrc:\n.\npos:%u,%u\ndln:0\ndel:\n}\n", ( index_contig[i] ), pos_start[i], pos_end[i] ); }
-
- if ( orien_array[i] == '-' ) { fprintf ( fo3, "{MPS\ntyp:R\nmid:%s\nsrc:\n.\npos:%u,%u\ndln:0\ndel:\n}\n", ( index_contig[i] ), pos_end[i], pos_start[i] ); }
- }
-
- fprintf ( fo3, "}\n" );
- free ( ( void * ) scaffBuffer );
- }
-
- free ( ( void * ) pos_start );
- free ( ( void * ) pos_end );
- free ( ( void * ) orien_array );
-
- for ( i = 0; i < 1000000; i++ )
- {
- free ( ( void * ) index_contig[i] );
- }
-
- free ( ( void * ) index_contig );
+ CTGinSCAF *actg, *prevCtg = NULL;
+ unsigned int ctg, bal_ctg, ctg_out, length;
+ int start, outputlen, gapN;
+ char *pt;
+ int column = 0;
+ long long cvgSum = 0;
+ int lenSum = 0;
+ int i, t, lu_len = 0, lu_end = 0;
+ unsigned int ctg_start_pos = 0;
+ char strand;
+ unsigned int *pos_start = ( unsigned int * ) ckalloc ( 1000000 * sizeof ( unsigned int ) );
+ unsigned int *pos_end = ( unsigned int * ) ckalloc ( 1000000 * sizeof ( unsigned int ) );
+ // char index_contig[num_ctg][20];
+ char **index_contig = ( char ** ) ckalloc ( 1000000 * sizeof ( char * ) );
+
+ for ( i = 0; i < 1000000; i++ )
+ {
+ index_contig[i] = ( char * ) ckalloc ( 20 * sizeof ( char ) );
+ }
+
+ char *orien_array;
+ orien_array = ( char * ) ckalloc ( 1000000 * sizeof ( char ) );
+ // scaffBuffer = (char *) ckalloc (300000000 * sizeof (char));
+ stackRecover ( ctgsStack );
+
+ while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
+ {
+ if ( ! ( contig_array[actg->ctgID].cvg > 0 ) )
+ {
+ continue;
+ }
+
+ lenSum += contig_array[actg->ctgID].length;
+ cvgSum += contig_array[actg->ctgID].length * contig_array[actg->ctgID].cvg;
+ }
+
+ if ( lenSum > 0 )
+ {
+ fprintf ( fo, ">scaffold%d %4.1f\n", index, ( double ) cvgSum / lenSum );
+ }
+ else
+ {
+ fprintf ( fo, ">scaffold%d 0.0\n", index );
+ }
+
+ fprintf ( foc, ">scaffold%d\n", index );
+ stackRecover ( ctgsStack );
+
+ while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
+ {
+ ctg = actg->ctgID;
+ bal_ctg = getTwinCtg ( ctg );
+ length = contig_array[ctg].length + overlaplen;
+
+ if ( prevCtg && actg->scaftig_start )
+ {
+ gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length;
+ gapN = gapN > 0 ? gapN : 1;
+ outputNs ( fo, gapN, &column );
+ ctg_start_pos += gapN;
+ //outputGapInfo(prevCtg->ctgID,ctg);
+ Ncounter++;
+ }
+
+ if ( !prevCtg )
+ {
+ start = 0;
+ }
+ else
+ {
+ start = actg->cutHead;
+ }
+
+ outputlen = length - start - actg->cutTail;
+
+ if ( contig_array[ctg].seq )
+ {
+ outputTightStr ( fo, contig_array[ctg].seq, start, length, outputlen, 0, &column );
+ lu_end = start + outputlen > length ? length : start + outputlen;
+ lu_len = lu_end - start;
+ strand = '+';
+ fprintf ( foc, "%d\t", index_array[ctg] );
+ }
+ else if ( contig_array[bal_ctg].seq )
+ {
+ outputTightStr ( fo, contig_array[bal_ctg].seq, start, length, outputlen, 1, &column );
+ lu_end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
+ lu_len = length - start - lu_end;
+ strand = '-';
+ fprintf ( foc, "%d\t", index_array[bal_ctg] );
+ }
+
+ fprintf ( foc, "%u\t%c\t%d\n", ctg_start_pos, strand, lu_len );
+ ctg_start_pos += lu_len;
+
+ if ( actg->gapSeqLen < 1 )
+ {
+ prevCtg = actg;
+ continue;
+ }
+
+ pt = ( char * ) darrayPut ( gapSeqArray, actg->gapSeqOffset );
+ outputTightStrLowerCase ( fo, pt, 0, actg->gapSeqLen, actg->gapSeqLen, 0, &column );
+ ctg_start_pos = ctg_start_pos + actg->gapSeqLen;
+ prevCtg = actg;
+ }
+
+ if ( column % 100 != 0 )
+ {
+ fprintf ( fo, "\n" );
+ }
+
+ if ( visual )
+ {
+ scaffBuffer = ( char * ) ckalloc ( ( ctg_start_pos + 5 ) * sizeof ( char ) );
+ prevCtg = NULL;
+ column = 0;
+ ctg_start_pos = 0;
+ lenSum = 0;
+ stackRecover ( ctgsStack );
+
+ while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
+ {
+ ctg = actg->ctgID;
+ bal_ctg = getTwinCtg ( ctg );
+ length = contig_array[ctg].length + overlaplen;
+
+ if ( prevCtg && actg->scaftig_start )
+ {
+ gapN = actg->start - prevCtg->start - contig_array[prevCtg->ctgID].length;
+ gapN = gapN > 0 ? gapN : 1;
+ outputNs2 ( scaffBuffer, gapN, &column );
+ ctg_start_pos += gapN;
+ // Ncounter++;
+ }
+
+ if ( !prevCtg )
+ {
+ start = 0;
+ }
+ else
+ {
+ start = actg->cutHead;
+ }
+
+ outputlen = length - start - actg->cutTail;
+
+ if ( contig_array[ctg].seq )
+ {
+ t = ++contig_index_array[index_array[ctg]];
+ outputTightStr2 ( contig_array[ctg].seq, scaffBuffer, start, length, outputlen, 0, &column );
+ lu_end = start + outputlen > length ? length : start + outputlen;
+ lu_len = lu_end - start;
+ strand = '+';
+ // fprintf (foc, "%d\t", index_array[ctg]);
+ lenSum++;
+
+ if ( ctg_start_pos - start >= 0 )
+ {
+ pos_start[lenSum] = ctg_start_pos - start;
+ pos_end[lenSum] = ctg_start_pos + length - start;
+ orien_array[lenSum] = '+';
+
+ if ( t == 1 )
+ {
+ sprintf ( index_contig[lenSum], "%u", index_array[ctg] );
+ }
+ else
+ {
+ sprintf ( index_contig[lenSum], "%u-%d", index_array[ctg], t - 1 );
+ }
+
+ fprintf ( fo3, "{AFG\nacc:%s\nclr:0,%d\n}\n", ( index_contig[lenSum] ), length );
+ outputTightStr2Visual ( foc2, ctg, & ( contig_index_array[index_array[ctg]] ), 0, length, length, 0 );
+ }
+ else
+ {
+ pos_start[lenSum] = 0;
+ pos_end[lenSum] = ctg_start_pos + length - start;
+ orien_array[lenSum] = '+';
+
+ if ( t == 1 )
+ {
+ sprintf ( index_contig[lenSum], "%u", index_array[ctg] );
+ }
+ else
+ {
+ sprintf ( index_contig[lenSum], "%u-%d", index_array[ctg], t - 1 );
+ }
+
+ fprintf ( fo3, "{AFG\nacc:%s\nclr:0,%d\n}\n", ( index_contig[lenSum] ), length );
+ outputTightStr2Visual ( foc2, ctg, & ( contig_index_array[index_array[ctg]] ), start - ctg_start_pos, length, length - start + ctg_start_pos, 0 );
+ }
+ }
+ else if ( contig_array[bal_ctg].seq )
+ {
+ t = ++contig_index_array[index_array[bal_ctg]];
+ outputTightStr2 ( contig_array[bal_ctg].seq, scaffBuffer, start, length, outputlen, 1, &column );
+ lu_end = length - start - outputlen - 1 >= 0 ? length - start - outputlen : 0;
+ lu_len = length - start - lu_end;
+ strand = '-';
+ // fprintf (foc, "%d\t", index_array[bal_ctg]);
+ lenSum++;
+
+ if ( ctg_start_pos - start >= 0 )
+ {
+ pos_start[lenSum] = ctg_start_pos - start;
+ pos_end[lenSum] = ctg_start_pos + length - start;
+ orien_array[lenSum] = '-';
+
+ if ( t == 1 )
+ {
+ sprintf ( index_contig[lenSum], "%u", index_array[bal_ctg] );
+ }
+ else
+ {
+ sprintf ( index_contig[lenSum], "%u-%d", index_array[bal_ctg], t - 1 );
+ }
+
+ fprintf ( fo3, "{AFG\nacc:%s\nclr:0,%d\n}\n", ( index_contig[lenSum] ), length );
+ outputTightStr2Visual ( foc2, bal_ctg, & ( contig_index_array[index_array[bal_ctg]] ), 0, length, length, 1 );
+ }
+ else
+ {
+ pos_start[lenSum] = ctg_start_pos;
+ pos_end[lenSum] = ctg_start_pos + lu_len;
+ orien_array[lenSum] = '-';
+
+ if ( t == 1 )
+ {
+ sprintf ( index_contig[lenSum], "%u", index_array[bal_ctg] );
+ }
+ else
+ {
+ sprintf ( index_contig[lenSum], "%u-%d", index_array[bal_ctg], t - 1 );
+ }
+
+ fprintf ( fo3, "{AFG\nacc:%s\nclr:0,%d\n}\n", ( index_contig[lenSum] ), length );
+ outputTightStr2Visual ( foc2, bal_ctg, & ( contig_index_array[index_array[bal_ctg]] ), start, length, outputlen, 1 );
+ }
+ }
+
+ // fprintf (foc, "%u\t%c\t%d\n", ctg_start_pos, strand, lu_len);
+ ctg_start_pos += lu_len;
+
+ if ( actg->gapSeqLen < 1 )
+ {
+ prevCtg = actg;
+ continue;
+ }
+
+ pt = ( char * ) darrayPut ( gapSeqArray, actg->gapSeqOffset );
+ outputTightStrLowerCase2 ( pt, scaffBuffer, 0, actg->gapSeqLen, actg->gapSeqLen, 0, &column );
+ outputTightStrLowerCase2Visual ( foc2, gapNum, pt, 0, actg->gapSeqLen, actg->gapSeqLen );
+ fprintf ( fo3, "{AFG\nacc:%d-0\nclr:0,%d\n}\n", gapNum, actg->gapSeqLen );
+ lenSum++;
+ pos_start[lenSum] = ctg_start_pos;
+ pos_end[lenSum] = ctg_start_pos + actg->gapSeqLen;
+ orien_array[lenSum] = '+';
+ sprintf ( index_contig[lenSum], "%d-0", gapNum );
+ gapNum++;
+ ctg_start_pos = ctg_start_pos + actg->gapSeqLen;
+ prevCtg = actg;
+ }
+
+ scaffNum++;
+ fprintf ( fo3, "{CCO\nacc:%d\npla:P\nlen:%u\ncns:\n", scaffNum, ctg_start_pos );
+
+ for ( i = 0; i < ctg_start_pos; i++ )
+ {
+ if ( i != 0 && i % 100 == 0 && i < ctg_start_pos - 1 )
+ {
+ fprintf ( fo3, "\n" );
+ }
+
+ fprintf ( fo3, "%c", scaffBuffer[i] );
+ }
+
+ fprintf ( fo3, "\n.\nqlt:\n" );
+
+ for ( i = 0; i < ctg_start_pos; i++ )
+ {
+ if ( i != 0 && i % 100 == 0 && i < ctg_start_pos - 1 )
+ {
+ fprintf ( fo3, "\n" );
+ }
+
+ fprintf ( fo3, "D" );
+ }
+
+ fprintf ( fo3, "\n.\nnpc:%d\n", lenSum );
+
+ for ( i = 1; i <= lenSum; i++ )
+ {
+ if ( orien_array[i] == '+' )
+ {
+ fprintf ( fo3, "{MPS\ntyp:R\nmid:%s\nsrc:\n.\npos:%u,%u\ndln:0\ndel:\n}\n", ( index_contig[i] ), pos_start[i], pos_end[i] );
+ }
+
+ if ( orien_array[i] == '-' )
+ {
+ fprintf ( fo3, "{MPS\ntyp:R\nmid:%s\nsrc:\n.\npos:%u,%u\ndln:0\ndel:\n}\n", ( index_contig[i] ), pos_end[i], pos_start[i] );
+ }
+ }
+
+ fprintf ( fo3, "}\n" );
+ free ( ( void * ) scaffBuffer );
+ }
+
+ free ( ( void * ) pos_start );
+ free ( ( void * ) pos_end );
+ free ( ( void * ) orien_array );
+
+ for ( i = 0; i < 1000000; i++ )
+ {
+ free ( ( void * ) index_contig[i] );
+ }
+
+ free ( ( void * ) index_contig );
}
-static void fill1scaf ( int index, STACK * ctgsStack, int thrdID );
+static void fill1scaf ( int index, STACK *ctgsStack, int thrdID );
static void check1scaf ( int t, int thrdID )
{
- if ( flagBuf[t] )
- {
- return;
- }
-
- boolean late = 0;
- pthread_mutex_lock ( &mutex );
-
- if ( !flagBuf[t] )
- {
- flagBuf[t] = 1;
- thrdNoBuf[t] = thrdID;
- }
- else
- {
- late = 1;
- }
-
- pthread_mutex_unlock ( &mutex );
-
- if ( late )
- {
- return;
- }
-
- counters[thrdID]++;
- fill1scaf ( scafCounter + t + 1, ctgStackBuffer[t], thrdID );
+ if ( flagBuf[t] )
+ {
+ return;
+ }
+
+ boolean late = 0;
+ pthread_mutex_lock ( &mutex );
+
+ if ( !flagBuf[t] )
+ {
+ flagBuf[t] = 1;
+ thrdNoBuf[t] = thrdID;
+ }
+ else
+ {
+ late = 1;
+ }
+
+ pthread_mutex_unlock ( &mutex );
+
+ if ( late )
+ {
+ return;
+ }
+
+ counters[thrdID]++;
+ fill1scaf ( scafCounter + t + 1, ctgStackBuffer[t], thrdID );
}
-static void fill1scaf ( int index, STACK * ctgsStack, int thrdID )
+static void fill1scaf ( int index, STACK *ctgsStack, int thrdID )
{
- CTGinSCAF * actg, *prevCtg = NULL;
- READNEARBY * rdArray, *rdArray4gap, *rd;
- int numRd = 0, count, maxGLen = 0;
- unsigned int ctg, bal_ctg;
- STACK * rdStack;
-
- while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
- {
- if ( prevCtg )
- {
- maxGLen = maxGLen < ( actg->start - prevCtg->end ) ? ( actg->start - prevCtg->end ) : maxGLen;
- }
-
- ctg = actg->ctgID;
- bal_ctg = getTwinCtg ( ctg );
-
- if ( actg->mask )
- {
- prevCtg = actg;
- continue;
- }
-
- if ( contig_array[ctg].closeReads )
- {
- numRd += contig_array[ctg].closeReads->item_c;
- }
- else if ( contig_array[bal_ctg].closeReads )
- {
- numRd += contig_array[bal_ctg].closeReads->item_c;
- }
-
- prevCtg = actg;
- }
-
- if ( numRd < 1 )
- {
- return;
- }
-
- rdArray = ( READNEARBY * ) ckalloc ( numRd * sizeof ( READNEARBY ) );
- rdArray4gap = ( READNEARBY * ) ckalloc ( numRd * sizeof ( READNEARBY ) );
- //fprintf(stderr,"scaffold%d reads4gap %d\n",index,numRd);
- // collect reads appended to contigs in this scaffold
- int numRd2 = 0;
- stackRecover ( ctgsStack );
-
- while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
- {
- ctg = actg->ctgID;
- bal_ctg = getTwinCtg ( ctg );
-
- if ( actg->mask )
- {
- continue;
- }
-
- if ( contig_array[ctg].closeReads )
- {
- rdStack = contig_array[ctg].closeReads;
- }
- else if ( contig_array[bal_ctg].closeReads )
- {
- rdStack = contig_array[bal_ctg].closeReads;
- }
- else
- {
- continue;
- }
-
- stackBackup ( rdStack );
-
- while ( ( rd = ( READNEARBY * ) stackPop ( rdStack ) ) != NULL )
- {
- rdArray[numRd2].len = rd->len;
- rdArray[numRd2].seqStarter = rd->seqStarter;
-
- if ( isSmallerThanTwin ( ctg ) )
- {
- rdArray[numRd2++].dis = actg->start - overlaplen + rd->dis;
- }
- else
- { rdArray[numRd2++].dis = actg->start - overlaplen + contig_array[ctg].length - rd->len - rd->dis; }
- }
-
- stackRecover ( rdStack );
- }
-
- if ( numRd2 != numRd )
- {
- fprintf ( stderr, "##reads numbers doesn't match, %d vs %d when scaffold %d.\n", numRd, numRd2, index );
- }
-
- qsort ( rdArray, numRd, sizeof ( READNEARBY ), cmp_reads );
- //fill gap one by one
- int gapStart, gapEnd;
- int numIn = 0;
- boolean flag;
- int buffer_size = maxReadLen > 100 ? maxReadLen : 100;
- int maxGSLen = maxGLen + GLDiff < 10 ? 10 : maxGLen + GLDiff;
- //fprintf(stderr,"maxGlen %d, maxGSlen %d\n",maxGLen,maxGSLen);
- char * seqGap = ( char * ) ckalloc ( maxGSLen * sizeof ( char ) ); // temp array for gap sequence
- Kmer * kmerCtg1 = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- Kmer * kmerCtg2 = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- char * seqCtg1 = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
- char * seqCtg2 = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
- prevCtg = NULL;
- stackRecover ( ctgsStack );
-
- while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
- {
- if ( !prevCtg || !actg->scaftig_start )
- {
- prevCtg = actg;
- continue;
- }
-
- gapStart = prevCtg->end - 100;
- gapEnd = actg->start - overlaplen + 100;
- cutRdArray ( rdArray, gapStart, gapEnd, &count, numRd, rdArray4gap );
- numIn += count;
- /*
- if(!count){
- prevCtg = actg;
- continue;
- }
- */
- int overlap;
-
- for ( overlap = overlaplen; overlap > 14; overlap -= 2 )
- {
- flag = localGraph ( rdArray4gap, count, prevCtg, actg, overlaplen, kmerCtg1, kmerCtg2, overlap, darrayBuf[thrdID], seqCtg1, seqCtg2, seqGap );
-
- //free_kmerset(kmerSet);
-
- if ( flag == 1 )
- {
- /*
- fprintf(stderr,"Between ctg %d and %d, Found with %d\n",prevCtg->ctgID
- ,actg->ctgID,overlap);
- */
- break;
- }
- }
-
- /*
- if(count==0)
- printf("Gap closed without reads\n");
- if(!flag)
- fprintf(stderr,"Between ctg %d and %d, NO routes found\n",prevCtg->ctgID,actg->ctgID);
- */
- prevCtg = actg;
- }
-
- //fprintf(stderr,"____scaffold%d reads in gap %d\n",index,numIn);
- free ( ( void * ) seqGap );
- free ( ( void * ) kmerCtg1 );
- free ( ( void * ) kmerCtg2 );
- free ( ( void * ) seqCtg1 );
- free ( ( void * ) seqCtg2 );
- free ( ( void * ) rdArray );
- free ( ( void * ) rdArray4gap );
+ CTGinSCAF *actg, *prevCtg = NULL;
+ READNEARBY *rdArray, *rdArray4gap, *rd;
+ int numRd = 0, count, maxGLen = 0;
+ unsigned int ctg, bal_ctg;
+ STACK *rdStack;
+
+ while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
+ {
+ if ( prevCtg )
+ {
+ maxGLen = maxGLen < ( actg->start - prevCtg->end ) ? ( actg->start - prevCtg->end ) : maxGLen;
+ }
+
+ ctg = actg->ctgID;
+ bal_ctg = getTwinCtg ( ctg );
+
+ if ( actg->mask )
+ {
+ prevCtg = actg;
+ continue;
+ }
+
+ if ( contig_array[ctg].closeReads )
+ {
+ numRd += contig_array[ctg].closeReads->item_c;
+ }
+ else if ( contig_array[bal_ctg].closeReads )
+ {
+ numRd += contig_array[bal_ctg].closeReads->item_c;
+ }
+
+ prevCtg = actg;
+ }
+
+ if ( numRd < 1 )
+ {
+ return;
+ }
+
+ rdArray = ( READNEARBY * ) ckalloc ( numRd * sizeof ( READNEARBY ) );
+ rdArray4gap = ( READNEARBY * ) ckalloc ( numRd * sizeof ( READNEARBY ) );
+ //fprintf(stderr,"scaffold%d reads4gap %d\n",index,numRd);
+ // collect reads appended to contigs in this scaffold
+ int numRd2 = 0;
+ stackRecover ( ctgsStack );
+
+ while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
+ {
+ ctg = actg->ctgID;
+ bal_ctg = getTwinCtg ( ctg );
+
+ if ( actg->mask )
+ {
+ continue;
+ }
+
+ if ( contig_array[ctg].closeReads )
+ {
+ rdStack = contig_array[ctg].closeReads;
+ }
+ else if ( contig_array[bal_ctg].closeReads )
+ {
+ rdStack = contig_array[bal_ctg].closeReads;
+ }
+ else
+ {
+ continue;
+ }
+
+ stackBackup ( rdStack );
+
+ while ( ( rd = ( READNEARBY * ) stackPop ( rdStack ) ) != NULL )
+ {
+ rdArray[numRd2].len = rd->len;
+ rdArray[numRd2].seqStarter = rd->seqStarter;
+
+ if ( isSmallerThanTwin ( ctg ) )
+ {
+ rdArray[numRd2++].dis = actg->start - overlaplen + rd->dis;
+ }
+ else
+ {
+ rdArray[numRd2++].dis = actg->start - overlaplen + contig_array[ctg].length - rd->len - rd->dis;
+ }
+ }
+
+ stackRecover ( rdStack );
+ }
+
+ if ( numRd2 != numRd )
+ {
+ fprintf ( stderr, "##reads numbers doesn't match, %d vs %d when scaffold %d.\n", numRd, numRd2, index );
+ }
+
+ qsort ( rdArray, numRd, sizeof ( READNEARBY ), cmp_reads );
+ //fill gap one by one
+ int gapStart, gapEnd;
+ int numIn = 0;
+ boolean flag;
+ int buffer_size = maxReadLen > 100 ? maxReadLen : 100;
+ int maxGSLen = maxGLen + GLDiff < 10 ? 10 : maxGLen + GLDiff;
+ //fprintf(stderr,"maxGlen %d, maxGSlen %d\n",maxGLen,maxGSLen);
+ char *seqGap = ( char * ) ckalloc ( maxGSLen * sizeof ( char ) ); // temp array for gap sequence
+ Kmer *kmerCtg1 = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ Kmer *kmerCtg2 = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ char *seqCtg1 = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
+ char *seqCtg2 = ( char * ) ckalloc ( buffer_size * sizeof ( char ) );
+ prevCtg = NULL;
+ stackRecover ( ctgsStack );
+
+ while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
+ {
+ if ( !prevCtg || !actg->scaftig_start )
+ {
+ prevCtg = actg;
+ continue;
+ }
+
+ gapStart = prevCtg->end - 100;
+ gapEnd = actg->start - overlaplen + 100;
+ cutRdArray ( rdArray, gapStart, gapEnd, &count, numRd, rdArray4gap );
+ numIn += count;
+ /*
+ if(!count){
+ prevCtg = actg;
+ continue;
+ }
+ */
+ int overlap;
+
+ for ( overlap = overlaplen; overlap > 14; overlap -= 2 )
+ {
+ flag = localGraph ( rdArray4gap, count, prevCtg, actg, overlaplen, kmerCtg1, kmerCtg2, overlap, darrayBuf[thrdID], seqCtg1, seqCtg2, seqGap );
+
+ //free_kmerset(kmerSet);
+
+ if ( flag == 1 )
+ {
+ /*
+ fprintf(stderr,"Between ctg %d and %d, Found with %d\n",prevCtg->ctgID
+ ,actg->ctgID,overlap);
+ */
+ break;
+ }
+ }
+
+ /*
+ if(count==0)
+ printf("Gap closed without reads\n");
+ if(!flag)
+ fprintf(stderr,"Between ctg %d and %d, NO routes found\n",prevCtg->ctgID,actg->ctgID);
+ */
+ prevCtg = actg;
+ }
+
+ //fprintf(stderr,"____scaffold%d reads in gap %d\n",index,numIn);
+ free ( ( void * ) seqGap );
+ free ( ( void * ) kmerCtg1 );
+ free ( ( void * ) kmerCtg2 );
+ free ( ( void * ) seqCtg1 );
+ free ( ( void * ) seqCtg2 );
+ free ( ( void * ) rdArray );
+ free ( ( void * ) rdArray4gap );
}
-static void reverseStack ( STACK * dStack, STACK * sStack )
+static void reverseStack ( STACK *dStack, STACK *sStack )
{
- CTGinSCAF * actg, *ctgPt;
- emptyStack ( dStack );
-
- while ( ( actg = ( CTGinSCAF * ) stackPop ( sStack ) ) != NULL )
- {
- ctgPt = ( CTGinSCAF * ) stackPush ( dStack );
- ctgPt->ctgID = actg->ctgID;
- ctgPt->start = actg->start;
- ctgPt->end = actg->end;
- ctgPt->scaftig_start = actg->scaftig_start;
- ctgPt->mask = actg->mask;
- ctgPt->cutHead = actg->cutHead;
- ctgPt->cutTail = actg->cutTail;
- ctgPt->gapSeqLen = actg->gapSeqLen;
- ctgPt->gapSeqOffset = actg->gapSeqOffset;
- }
-
- stackBackup ( dStack );
+ CTGinSCAF *actg, *ctgPt;
+ emptyStack ( dStack );
+
+ while ( ( actg = ( CTGinSCAF * ) stackPop ( sStack ) ) != NULL )
+ {
+ ctgPt = ( CTGinSCAF * ) stackPush ( dStack );
+ ctgPt->ctgID = actg->ctgID;
+ ctgPt->start = actg->start;
+ ctgPt->end = actg->end;
+ ctgPt->scaftig_start = actg->scaftig_start;
+ ctgPt->mask = actg->mask;
+ ctgPt->cutHead = actg->cutHead;
+ ctgPt->cutTail = actg->cutTail;
+ ctgPt->gapSeqLen = actg->gapSeqLen;
+ ctgPt->gapSeqOffset = actg->gapSeqOffset;
+ }
+
+ stackBackup ( dStack );
}
#ifdef MER127
-static Kmer tightStr2Kmer ( char * tightStr, int start, int length, int revS )
+static Kmer tightStr2Kmer ( char *tightStr, int start, int length, int revS )
{
- int i;
- Kmer word;
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
-
- if ( !revS )
- {
- if ( start + overlaplen > length )
- {
- fprintf ( stderr, "The tightStr2Kmer A: no enough bases for kmer.\n" );
- return word;
- }
-
- for ( i = start; i < start + overlaplen; i++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= getCharInTightString ( tightStr, i );
- }
- }
- else
- {
- if ( length - start - overlaplen < 0 )
- {
- fprintf ( stderr, "The tightStr2Kmer B: no enough bases for kmer.\n" );
- return word;
- }
-
- for ( i = length - 1 - start; i > length - 1 - start - overlaplen; i-- )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= int_comp ( getCharInTightString ( tightStr, i ) );
- }
- }
-
- return word;
+ int i;
+ Kmer word;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
+
+ if ( !revS )
+ {
+ if ( start + overlaplen > length )
+ {
+ fprintf ( stderr, "The tightStr2Kmer A: no enough bases for kmer.\n" );
+ return word;
+ }
+
+ for ( i = start; i < start + overlaplen; i++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= getCharInTightString ( tightStr, i );
+ }
+ }
+ else
+ {
+ if ( length - start - overlaplen < 0 )
+ {
+ fprintf ( stderr, "The tightStr2Kmer B: no enough bases for kmer.\n" );
+ return word;
+ }
+
+ for ( i = length - 1 - start; i > length - 1 - start - overlaplen; i-- )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= int_comp ( getCharInTightString ( tightStr, i ) );
+ }
+ }
+
+ return word;
}
static Kmer maxKmer ()
{
- Kmer word;
- word.high1 = word.low1 = word.high2 = word.low2 = 0;
- int i;
+ Kmer word;
+ word.high1 = word.low1 = word.high2 = word.low2 = 0;
+ int i;
- for ( i = 0; i < overlaplen; i++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= 0x3;
- }
+ for ( i = 0; i < overlaplen; i++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= 0x3;
+ }
- return word;
+ return word;
}
#else
-static Kmer tightStr2Kmer ( char * tightStr, int start, int length, int revS )
+static Kmer tightStr2Kmer ( char *tightStr, int start, int length, int revS )
{
- int i;
- Kmer word;
- word.high = word.low = 0;
-
- if ( !revS )
- {
- if ( start + overlaplen > length )
- {
- fprintf ( stderr, "The tightStr2Kmer A: no enough bases for kmer.\n" );
- return word;
- }
-
- for ( i = start; i < start + overlaplen; i++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= getCharInTightString ( tightStr, i );
- }
- }
- else
- {
- if ( length - start - overlaplen < 0 )
- {
- fprintf ( stderr, "The tightStr2Kmer B: no enough bases for kmer.\n" );
- return word;
- }
-
- for ( i = length - 1 - start; i > length - 1 - start - overlaplen; i-- )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= int_comp ( getCharInTightString ( tightStr, i ) );
- }
- }
-
- return word;
+ int i;
+ Kmer word;
+ word.high = word.low = 0;
+
+ if ( !revS )
+ {
+ if ( start + overlaplen > length )
+ {
+ fprintf ( stderr, "The tightStr2Kmer A: no enough bases for kmer.\n" );
+ return word;
+ }
+
+ for ( i = start; i < start + overlaplen; i++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= getCharInTightString ( tightStr, i );
+ }
+ }
+ else
+ {
+ if ( length - start - overlaplen < 0 )
+ {
+ fprintf ( stderr, "The tightStr2Kmer B: no enough bases for kmer.\n" );
+ return word;
+ }
+
+ for ( i = length - 1 - start; i > length - 1 - start - overlaplen; i-- )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= int_comp ( getCharInTightString ( tightStr, i ) );
+ }
+ }
+
+ return word;
}
static Kmer maxKmer ()
{
- Kmer word;
- word.high = word.low = 0;
- int i;
+ Kmer word;
+ word.high = word.low = 0;
+ int i;
- for ( i = 0; i < overlaplen; i++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= 0x3;
- }
+ for ( i = 0; i < overlaplen; i++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= 0x3;
+ }
- return word;
+ return word;
}
#endif
static int contigCatch ( unsigned int prev_ctg, unsigned int ctg )
{
- if ( contig_array[prev_ctg].length == 0 || contig_array[ctg].length == 0 )
- {
- return 0;
- }
-
- Kmer kmerAtEnd, kmerAtStart;
- Kmer MaxKmer;
- unsigned int bal_ctg1 = getTwinCtg ( prev_ctg );
- unsigned int bal_ctg2 = getTwinCtg ( ctg );
- int i, start;
- int len1 = contig_array[prev_ctg].length + overlaplen;
- int len2 = contig_array[ctg].length + overlaplen;
- start = contig_array[prev_ctg].length;
-
- if ( contig_array[prev_ctg].seq )
- {
- kmerAtEnd = tightStr2Kmer ( contig_array[prev_ctg].seq, start, len1, 0 );
- }
- else
- {
- kmerAtEnd = tightStr2Kmer ( contig_array[bal_ctg1].seq, start, len1, 1 );
- }
-
- start = 0;
-
- if ( contig_array[ctg].seq )
- {
- kmerAtStart = tightStr2Kmer ( contig_array[ctg].seq, start, len2, 0 );
- }
- else
- {
- kmerAtStart = tightStr2Kmer ( contig_array[bal_ctg2].seq, start, len2, 1 );
- }
-
- MaxKmer = MAXKMER;
-
- for ( i = 0; i < 10; i++ )
- {
- if ( KmerEqual ( kmerAtStart, kmerAtEnd ) )
- {
- break;
- }
-
- MaxKmer = KmerRightBitMoveBy2 ( MaxKmer );
- kmerAtEnd = KmerAnd ( kmerAtEnd, MaxKmer );
- kmerAtStart = KmerRightBitMoveBy2 ( kmerAtStart );
- }
-
- if ( i < 10 )
- {
- return overlaplen - i;
- }
- else
- {
- return 0;
- }
+ if ( contig_array[prev_ctg].length == 0 || contig_array[ctg].length == 0 )
+ {
+ return 0;
+ }
+
+ Kmer kmerAtEnd, kmerAtStart;
+ Kmer MaxKmer;
+ unsigned int bal_ctg1 = getTwinCtg ( prev_ctg );
+ unsigned int bal_ctg2 = getTwinCtg ( ctg );
+ int i, start;
+ int len1 = contig_array[prev_ctg].length + overlaplen;
+ int len2 = contig_array[ctg].length + overlaplen;
+ start = contig_array[prev_ctg].length;
+
+ if ( contig_array[prev_ctg].seq )
+ {
+ kmerAtEnd = tightStr2Kmer ( contig_array[prev_ctg].seq, start, len1, 0 );
+ }
+ else
+ {
+ kmerAtEnd = tightStr2Kmer ( contig_array[bal_ctg1].seq, start, len1, 1 );
+ }
+
+ start = 0;
+
+ if ( contig_array[ctg].seq )
+ {
+ kmerAtStart = tightStr2Kmer ( contig_array[ctg].seq, start, len2, 0 );
+ }
+ else
+ {
+ kmerAtStart = tightStr2Kmer ( contig_array[bal_ctg2].seq, start, len2, 1 );
+ }
+
+ MaxKmer = MAXKMER;
+
+ for ( i = 0; i < 10; i++ )
+ {
+ if ( KmerEqual ( kmerAtStart, kmerAtEnd ) )
+ {
+ break;
+ }
+
+ MaxKmer = KmerRightBitMoveBy2 ( MaxKmer );
+ kmerAtEnd = KmerAnd ( kmerAtEnd, MaxKmer );
+ kmerAtStart = KmerRightBitMoveBy2 ( kmerAtStart );
+ }
+
+ if ( i < 10 )
+ {
+ return overlaplen - i;
+ }
+ else
+ {
+ return 0;
+ }
}
-static void initStackBuf ( STACK ** ctgStackBuffer, int scafBufSize )
+static void initStackBuf ( STACK **ctgStackBuffer, int scafBufSize )
{
- int i;
+ int i;
- for ( i = 0; i < scafBufSize; i++ )
- {
- flagBuf[i] = 1;
- ctgStackBuffer[i] = ( STACK * ) createStack ( 100, sizeof ( CTGinSCAF ) );
- }
+ for ( i = 0; i < scafBufSize; i++ )
+ {
+ flagBuf[i] = 1;
+ ctgStackBuffer[i] = ( STACK * ) createStack ( 100, sizeof ( CTGinSCAF ) );
+ }
}
-static void freeStackBuf ( STACK ** ctgStackBuffer, int scafBufSize )
+static void freeStackBuf ( STACK **ctgStackBuffer, int scafBufSize )
{
- int i;
+ int i;
- for ( i = 0; i < scafBufSize; i++ )
- {
- freeStack ( ctgStackBuffer[i] );
- }
+ for ( i = 0; i < scafBufSize; i++ )
+ {
+ freeStack ( ctgStackBuffer[i] );
+ }
}
-static void threadRoutine ( void * para )
+static void threadRoutine ( void *para )
{
- PARAMETER * prm;
- int i;
- prm = ( PARAMETER * ) para;
-
- //printf("%dth thread with threadID %d, hash_table %p\n",id,prm.threadID,prm.hash_table);
- while ( 1 )
- {
- if ( * ( prm->selfSignal ) == 1 )
- {
- emptyDarray ( darrayBuf[prm->threadID] );
-
- for ( i = 0; i < scafInBuf; i++ )
- {
- check1scaf ( i, prm->threadID );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 2 )
- {
- * ( prm->selfSignal ) = 0;
- break;
- }
-
- usleep ( 1 );
- }
+ PARAMETER *prm;
+ int i;
+ prm = ( PARAMETER * ) para;
+
+ //printf("%dth thread with threadID %d, hash_table %p\n",id,prm.threadID,prm.hash_table);
+ while ( 1 )
+ {
+ if ( * ( prm->selfSignal ) == 1 )
+ {
+ emptyDarray ( darrayBuf[prm->threadID] );
+
+ for ( i = 0; i < scafInBuf; i++ )
+ {
+ check1scaf ( i, prm->threadID );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 2 )
+ {
+ * ( prm->selfSignal ) = 0;
+ break;
+ }
+
+ usleep ( 1 );
+ }
}
-static void creatThrds ( pthread_t * threads, PARAMETER * paras )
+static void creatThrds ( pthread_t *threads, PARAMETER *paras )
{
- unsigned char i;
- int temp;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
- {
- fprintf ( stderr, "Create threads failed.\n" );
- exit ( 1 );
- }
- }
-
- fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
+ unsigned char i;
+ int temp;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
+ {
+ fprintf ( stderr, "Create threads failed.\n" );
+ exit ( 1 );
+ }
+ }
+
+ fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
}
-static void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals )
+static void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals )
{
- int t;
-
- for ( t = 0; t < thrd_num; t++ )
- {
- thrdSignals[t + 1] = SIG;
- }
-
- while ( 1 )
- {
- usleep ( 10 );
-
- for ( t = 0; t < thrd_num; t++ )
- if ( thrdSignals[t + 1] )
- {
- break;
- }
-
- if ( t == thrd_num )
- {
- break;
- }
- }
+ int t;
+
+ for ( t = 0; t < thrd_num; t++ )
+ {
+ thrdSignals[t + 1] = SIG;
+ }
+
+ while ( 1 )
+ {
+ usleep ( 10 );
+
+ for ( t = 0; t < thrd_num; t++ )
+ if ( thrdSignals[t + 1] )
+ {
+ break;
+ }
+
+ if ( t == thrd_num )
+ {
+ break;
+ }
+ }
}
-static void thread_wait ( pthread_t * threads )
+static void thread_wait ( pthread_t *threads )
{
- int i;
+ int i;
- for ( i = 0; i < thrd_num; i++ )
- if ( threads[i] != 0 )
- {
- pthread_join ( threads[i], NULL );
- }
+ for ( i = 0; i < thrd_num; i++ )
+ if ( threads[i] != 0 )
+ {
+ pthread_join ( threads[i], NULL );
+ }
}
-static void outputSeqs ( FILE * fo, FILE * foc, FILE * foc2, FILE * fo2, FILE * fo3, int scafInBuf )
+static void outputSeqs ( FILE *fo, FILE *foc, FILE *foc2, FILE *fo2, FILE *fo3, int scafInBuf )
{
- int i, thrdID;
-
- for ( i = 0; i < scafInBuf; i++ )
- {
- thrdID = thrdNoBuf[i];
- outputScafSeq ( fo, foc, foc2, fo3, scafCounter + i + 1, ctgStackBuffer[i], darrayBuf[thrdID] );
- outputGapSeq ( fo2, scafCounter + i + 1, ctgStackBuffer[i], darrayBuf[thrdID] );
- }
+ int i, thrdID;
+
+ for ( i = 0; i < scafInBuf; i++ )
+ {
+ thrdID = thrdNoBuf[i];
+ outputScafSeq ( fo, foc, foc2, fo3, scafCounter + i + 1, ctgStackBuffer[i], darrayBuf[thrdID] );
+ outputGapSeq ( fo2, scafCounter + i + 1, ctgStackBuffer[i], darrayBuf[thrdID] );
+ }
}
static void MaskContig ( unsigned int ctg )
{
- contig_array[ctg].mask = 1;
- contig_array[getTwinCtg ( ctg )].mask = 1;
+ contig_array[ctg].mask = 1;
+ contig_array[getTwinCtg ( ctg )].mask = 1;
}
static void MarkCtgOccu ( unsigned int ctg )
{
- contig_array[ctg].flag = 1;
- contig_array[getTwinCtg ( ctg )].flag = 1;
+ contig_array[ctg].flag = 1;
+ contig_array[getTwinCtg ( ctg )].flag = 1;
}
-static void output_ctg ( unsigned int ctg, FILE * fo )
+static void output_ctg ( unsigned int ctg, FILE *fo )
{
- if ( contig_array[ctg].length < 1 )
- {
- return;
- }
-
- int len;
- unsigned int bal_ctg = getTwinCtg ( ctg );
- len = contig_array[ctg].length + overlaplen;
- int col = 0;
-
- if ( contig_array[ctg].seq )
- {
- fprintf ( fo, ">C%d %4.1f\n", ctg, ( double ) contig_array[ctg].cvg );
- outputTightStr ( fo, contig_array[ctg].seq, 0, len, len, 0, &col );
- }
- else if ( contig_array[bal_ctg].seq )
- {
- fprintf ( fo, ">C%d %4.1f\n", bal_ctg, ( double ) contig_array[ctg].cvg );
- outputTightStr ( fo, contig_array[bal_ctg].seq, 0, len, len, 0, &col );
- }
-
- contig_array[ctg].flag = 1;
- contig_array[bal_ctg].flag = 1;
-
- if ( len % 100 != 0 )
- {
- fprintf ( fo, "\n" );
- }
+ if ( contig_array[ctg].length < 1 )
+ {
+ return;
+ }
+
+ int len;
+ unsigned int bal_ctg = getTwinCtg ( ctg );
+ len = contig_array[ctg].length + overlaplen;
+ int col = 0;
+
+ if ( contig_array[ctg].seq )
+ {
+ fprintf ( fo, ">C%d %4.1f\n", ctg, ( double ) contig_array[ctg].cvg );
+ outputTightStr ( fo, contig_array[ctg].seq, 0, len, len, 0, &col );
+ }
+ else if ( contig_array[bal_ctg].seq )
+ {
+ fprintf ( fo, ">C%d %4.1f\n", bal_ctg, ( double ) contig_array[ctg].cvg );
+ outputTightStr ( fo, contig_array[bal_ctg].seq, 0, len, len, 0, &col );
+ }
+
+ contig_array[ctg].flag = 1;
+ contig_array[bal_ctg].flag = 1;
+
+ if ( len % 100 != 0 )
+ {
+ fprintf ( fo, "\n" );
+ }
}
-void prlReadsCloseGap ( char * graphfile )
+void prlReadsCloseGap ( char *graphfile )
{
- //thrd_num=1;
- if ( fillGap )
- {
- boolean flag;
- fprintf ( stderr, "\nStart to load reads for gap filling. %d length discrepancy is allowed.\n", GLDiff );
- fprintf ( stderr, "...\n" );
- flag = loadReads4gap ( graphfile );
-
- if ( !flag )
- {
- return;
- }
- }
-
- if ( orig2new )
- {
- convertIndex ();
- orig2new = 0;
- }
-
- FILE * fp, *fo, *fo2, *fo3 = NULL, *foc, *foc2 = NULL;
- char line[1024];
- CTGinSCAF * actg;
- STACK * ctgStack, *aStack;
- int index = 0, offset = 0, counter, overallLen;
- int i, starter, prev_start, gapLen, catchable, scafnum;
- unsigned int ctg, prev_ctg = 0;
- boolean IsPrevGap;
- pthread_t threads[thrd_num];
- unsigned char thrdSignal[thrd_num + 1];
- PARAMETER paras[thrd_num];
-
- for ( ctg = 1; ctg <= num_ctg; ctg++ )
- {
- contig_array[ctg].flag = 0;
- }
-
- MAXKMER = maxKmer ();
- ctgStack = ( STACK * ) createStack ( 1000, sizeof ( CTGinSCAF ) );
- sprintf ( line, "%s.scaf_gap", graphfile );
- fp = ckopen ( line, "r" );
- sprintf ( line, "%s.scafSeq", graphfile );
- fo = ckopen ( line, "w" );
- sprintf ( line, "%s.contigPosInscaff", graphfile );
- foc = ckopen ( line, "w" );
-
- if ( visual )
- {
- sprintf ( line, "%s.contig4asm", graphfile );
- foc2 = ckopen ( line, "w" );
- sprintf ( line, "%s.asm", graphfile );
- fo3 = ckopen ( line, "w" );
- }
-
- sprintf ( line, "%s.gapSeq", graphfile );
- fo2 = ckopen ( line, "w" );
- pthread_mutex_init ( &mutex, NULL );
- flagBuf = ( boolean * ) ckalloc ( scafBufSize * sizeof ( boolean ) );;
- thrdNoBuf = ( unsigned char * ) ckalloc ( scafBufSize * sizeof ( unsigned char ) );;
- memset ( thrdNoBuf, 0, scafBufSize * sizeof ( char ) );
- ctgStackBuffer = ( STACK ** ) ckalloc ( scafBufSize * sizeof ( STACK * ) );
- initStackBuf ( ctgStackBuffer, scafBufSize );
- darrayBuf = ( DARRAY ** ) ckalloc ( thrd_num * sizeof ( DARRAY * ) );
- counters = ( int * ) ckalloc ( thrd_num * sizeof ( int ) );
- contig_index_array = ( int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( int ) );
-
- for ( i = 0; i <= num_ctg; i++ )
- {
- contig_index_array[i] = 0;
- }
-
- for ( i = 0; i < thrd_num; i++ )
- {
- counters[i] = 0;
- darrayBuf[i] = ( DARRAY * ) createDarray ( 100000, sizeof ( char ) );
- thrdSignal[i + 1] = 0;
- paras[i].threadID = i;
- paras[i].mainSignal = &thrdSignal[0];
- paras[i].selfSignal = &thrdSignal[i + 1];
- }
-
- if ( fillGap )
- {
- creatThrds ( threads, paras );
- }
-
- Ncounter = scafCounter = scafInBuf = allGaps = 0;
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == '>' )
- {
- if ( index )
- {
- aStack = ctgStackBuffer[scafInBuf];
- flagBuf[scafInBuf++] = 0;
- reverseStack ( aStack, ctgStack );
-
- if ( scafInBuf == scafBufSize )
- {
- if ( fillGap )
- {
- sendWorkSignal ( 1, thrdSignal );
- }
-
- outputSeqs ( fo, foc, foc2, fo2, fo3, scafInBuf );
- scafCounter += scafInBuf;
- scafInBuf = 0;
- }
-
- if ( index % 1000 == 0 )
- {
- fprintf ( stderr, "%d scaffolds processed.\n", index );
- }
- }
-
- //read next scaff
- emptyStack ( ctgStack );
- IsPrevGap = offset = prev_ctg = 0;
- sscanf ( line + 9, "%d %d %d", &index, &counter, &overallLen );
- continue;
- }
-
- if ( line[0] == 'G' ) // gap appears
- {
- if ( fillGap )
- {
- gapLen = procGap ( line, ctgStack );
- IsPrevGap = 1;
- }
-
- continue;
- }
-
- if ( line[0] >= '0' && line[0] <= '9' ) // a contig line
- {
- sscanf ( line, "%d %d", &ctg, &starter );
- actg = ( CTGinSCAF * ) stackPush ( ctgStack );
- actg->ctgID = ctg;
-
- if ( contig_array[ctg].flag )
- {
- MaskContig ( ctg );
- }
- else
- {
- MarkCtgOccu ( ctg );
- }
-
- initiateCtgInScaf ( actg );
-
- if ( !prev_ctg )
- {
- actg->cutHead = 0;
- }
- else if ( !IsPrevGap )
- {
- allGaps++;
- }
-
- if ( !IsPrevGap )
- {
- if ( prev_ctg && ( starter - prev_start - ( int ) contig_array[prev_ctg].length ) < ( ( int ) overlaplen * 4 ) )
- {
- /*
- if(fillGap)
- catchable = contigCatch(prev_ctg,ctg);
- else
- */
- catchable = 0;
-
- if ( catchable ) // prev_ctg and ctg overlap **bp
- {
- allGaps--;
- /*
- if(isLargerThanTwin(prev_ctg))
- fprintf(stderr,"%d ####### by_overlap\n",getTwinCtg(prev_ctg));
- else
- fprintf(stderr,"%d ####### by_overlap\n",prev_ctg);
- */
- actg->scaftig_start = 0;
- actg->cutHead = catchable;
- offset += - ( starter - prev_start - contig_array[prev_ctg].length ) + ( overlaplen - catchable );
- }
- else
- {
- actg->scaftig_start = 1;
- }
- }
- else
- {
- actg->scaftig_start = 1;
- }
- }
- else
- {
- offset += - ( starter - prev_start - contig_array[prev_ctg].length ) + gapLen;
- actg->scaftig_start = 0;
- }
-
- actg->start = starter + offset;
- actg->end = actg->start + contig_array[ctg].length - 1;
- actg->mask = contig_array[ctg].mask;
- IsPrevGap = 0;
- prev_ctg = ctg;
- prev_start = starter;
- }
- }
-
- if ( index )
- {
- aStack = ctgStackBuffer[scafInBuf];
- flagBuf[scafInBuf++] = 0;
- reverseStack ( aStack, ctgStack );
-
- if ( fillGap )
- {
- sendWorkSignal ( 1, thrdSignal );
- }
-
- outputSeqs ( fo, foc, foc2, fo2, fo3, scafInBuf );
- }
-
- if ( visual )
- {
- scafnum = scaffNum;
-
- for ( i = 1; i <= scafnum; i++ )
- {
- fprintf ( fo3, "{SCF\nacc:%d\nnoc:0\n{CTP\nct1:%d\nct2:%d\nmea:0\nstd:0\nori:N\n}\n}\n", ++scaffNum, i, i );
- }
- }
-
- if ( fillGap )
- {
- sendWorkSignal ( 2, thrdSignal );
- thread_wait ( threads );
- }
-
- for ( ctg = 1; ctg <= num_ctg; ctg++ )
- {
- if ( ( contig_array[ctg].length + overlaplen ) < 100 || contig_array[ctg].flag )
- {
- continue;
- }
-
- output_ctg ( ctg, fo );
- }
-
- fprintf ( stderr, "\nDone with %d scaffolds, %d gaps finished, %d gaps overall.\n", index, allGaps - Ncounter, allGaps );
- index = 0;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- freeDarray ( darrayBuf[i] );
- index += counters[i];
- }
-
- if ( fillGap )
- {
- fprintf ( stderr, "Threads processed %d scaffolds.\n", index );
- }
-
- free ( ( void * ) darrayBuf );
-
- if ( readSeqInGap )
- {
- freeDarray ( readSeqInGap );
- }
-
- fclose ( fp );
- fclose ( fo );
- fclose ( foc );
- fclose ( fo2 );
-
- if ( visual )
- {
- fclose ( foc2 );
- fclose ( fo3 );
- }
-
- freeStack ( ctgStack );
- freeStackBuf ( ctgStackBuffer, scafBufSize );
- free ( ( void * ) flagBuf );
- free ( ( void * ) thrdNoBuf );
- free ( ( void * ) ctgStackBuffer );
+ //thrd_num=1;
+ if ( fillGap )
+ {
+ boolean flag;
+ fprintf ( stderr, "\nStart to load reads for gap filling. %d length discrepancy is allowed.\n", GLDiff );
+ fprintf ( stderr, "...\n" );
+ flag = loadReads4gap ( graphfile );
+
+ if ( !flag )
+ {
+ return;
+ }
+ }
+
+ if ( orig2new )
+ {
+ convertIndex ();
+ orig2new = 0;
+ }
+
+ FILE *fp, *fo, *fo2, *fo3 = NULL, *foc, *foc2 = NULL;
+ char line[1024];
+ CTGinSCAF *actg;
+ STACK *ctgStack, *aStack;
+ int index = 0, offset = 0, counter, overallLen;
+ int i, starter, prev_start, gapLen, catchable, scafnum;
+ unsigned int ctg, prev_ctg = 0;
+ boolean IsPrevGap;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+
+ for ( ctg = 1; ctg <= num_ctg; ctg++ )
+ {
+ contig_array[ctg].flag = 0;
+ }
+
+ MAXKMER = maxKmer ();
+ ctgStack = ( STACK * ) createStack ( 1000, sizeof ( CTGinSCAF ) );
+ sprintf ( line, "%s.scaf_gap", graphfile );
+ fp = ckopen ( line, "r" );
+ sprintf ( line, "%s.scafSeq", graphfile );
+ fo = ckopen ( line, "w" );
+ sprintf ( line, "%s.contigPosInscaff", graphfile );
+ foc = ckopen ( line, "w" );
+
+ if ( visual )
+ {
+ sprintf ( line, "%s.contig4asm", graphfile );
+ foc2 = ckopen ( line, "w" );
+ sprintf ( line, "%s.asm", graphfile );
+ fo3 = ckopen ( line, "w" );
+ }
+
+ sprintf ( line, "%s.gapSeq", graphfile );
+ fo2 = ckopen ( line, "w" );
+ pthread_mutex_init ( &mutex, NULL );
+ flagBuf = ( boolean * ) ckalloc ( scafBufSize * sizeof ( boolean ) );;
+ thrdNoBuf = ( unsigned char * ) ckalloc ( scafBufSize * sizeof ( unsigned char ) );;
+ memset ( thrdNoBuf, 0, scafBufSize * sizeof ( char ) );
+ ctgStackBuffer = ( STACK ** ) ckalloc ( scafBufSize * sizeof ( STACK * ) );
+ initStackBuf ( ctgStackBuffer, scafBufSize );
+ darrayBuf = ( DARRAY ** ) ckalloc ( thrd_num * sizeof ( DARRAY * ) );
+ counters = ( int * ) ckalloc ( thrd_num * sizeof ( int ) );
+ contig_index_array = ( int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( int ) );
+
+ for ( i = 0; i <= num_ctg; i++ )
+ {
+ contig_index_array[i] = 0;
+ }
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ counters[i] = 0;
+ darrayBuf[i] = ( DARRAY * ) createDarray ( 100000, sizeof ( char ) );
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ }
+
+ if ( fillGap )
+ {
+ creatThrds ( threads, paras );
+ }
+
+ Ncounter = scafCounter = scafInBuf = allGaps = 0;
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == '>' )
+ {
+ if ( index )
+ {
+ aStack = ctgStackBuffer[scafInBuf];
+ flagBuf[scafInBuf++] = 0;
+ reverseStack ( aStack, ctgStack );
+
+ if ( scafInBuf == scafBufSize )
+ {
+ if ( fillGap )
+ {
+ sendWorkSignal ( 1, thrdSignal );
+ }
+
+ outputSeqs ( fo, foc, foc2, fo2, fo3, scafInBuf );
+ scafCounter += scafInBuf;
+ scafInBuf = 0;
+ }
+
+ if ( index % 1000 == 0 )
+ {
+ fprintf ( stderr, "%d scaffolds processed.\n", index );
+ }
+ }
+
+ //read next scaff
+ emptyStack ( ctgStack );
+ IsPrevGap = offset = prev_ctg = 0;
+ sscanf ( line + 9, "%d %d %d", &index, &counter, &overallLen );
+ continue;
+ }
+
+ if ( line[0] == 'G' ) // gap appears
+ {
+ if ( fillGap )
+ {
+ gapLen = procGap ( line, ctgStack );
+ IsPrevGap = 1;
+ }
+
+ continue;
+ }
+
+ if ( line[0] >= '0' && line[0] <= '9' ) // a contig line
+ {
+ sscanf ( line, "%d %d", &ctg, &starter );
+ actg = ( CTGinSCAF * ) stackPush ( ctgStack );
+ actg->ctgID = ctg;
+
+ if ( contig_array[ctg].flag )
+ {
+ MaskContig ( ctg );
+ }
+ else
+ {
+ MarkCtgOccu ( ctg );
+ }
+
+ initiateCtgInScaf ( actg );
+
+ if ( !prev_ctg )
+ {
+ actg->cutHead = 0;
+ }
+ else if ( !IsPrevGap )
+ {
+ allGaps++;
+ }
+
+ if ( !IsPrevGap )
+ {
+ if ( prev_ctg && ( starter - prev_start - ( int ) contig_array[prev_ctg].length ) < ( ( int ) overlaplen * 4 ) )
+ {
+ /*
+ if(fillGap)
+ catchable = contigCatch(prev_ctg,ctg);
+ else
+ */
+ catchable = 0;
+
+ if ( catchable ) // prev_ctg and ctg overlap **bp
+ {
+ allGaps--;
+ /*
+ if(isLargerThanTwin(prev_ctg))
+ fprintf(stderr,"%d ####### by_overlap\n",getTwinCtg(prev_ctg));
+ else
+ fprintf(stderr,"%d ####### by_overlap\n",prev_ctg);
+ */
+ actg->scaftig_start = 0;
+ actg->cutHead = catchable;
+ offset += - ( starter - prev_start - contig_array[prev_ctg].length ) + ( overlaplen - catchable );
+ }
+ else
+ {
+ actg->scaftig_start = 1;
+ }
+ }
+ else
+ {
+ actg->scaftig_start = 1;
+ }
+ }
+ else
+ {
+ offset += - ( starter - prev_start - contig_array[prev_ctg].length ) + gapLen;
+ actg->scaftig_start = 0;
+ }
+
+ actg->start = starter + offset;
+ actg->end = actg->start + contig_array[ctg].length - 1;
+ actg->mask = contig_array[ctg].mask;
+ IsPrevGap = 0;
+ prev_ctg = ctg;
+ prev_start = starter;
+ }
+ }
+
+ if ( index )
+ {
+ aStack = ctgStackBuffer[scafInBuf];
+ flagBuf[scafInBuf++] = 0;
+ reverseStack ( aStack, ctgStack );
+
+ if ( fillGap )
+ {
+ sendWorkSignal ( 1, thrdSignal );
+ }
+
+ outputSeqs ( fo, foc, foc2, fo2, fo3, scafInBuf );
+ }
+
+ if ( visual )
+ {
+ scafnum = scaffNum;
+
+ for ( i = 1; i <= scafnum; i++ )
+ {
+ fprintf ( fo3, "{SCF\nacc:%d\nnoc:0\n{CTP\nct1:%d\nct2:%d\nmea:0\nstd:0\nori:N\n}\n}\n", ++scaffNum, i, i );
+ }
+ }
+
+ if ( fillGap )
+ {
+ sendWorkSignal ( 2, thrdSignal );
+ thread_wait ( threads );
+ }
+
+ for ( ctg = 1; ctg <= num_ctg; ctg++ )
+ {
+ if ( ( contig_array[ctg].length + overlaplen ) < 100 || contig_array[ctg].flag )
+ {
+ continue;
+ }
+
+ output_ctg ( ctg, fo );
+ }
+
+ fprintf ( stderr, "\nDone with %d scaffolds, %d gaps finished, %d gaps overall.\n", index, allGaps - Ncounter, allGaps );
+ index = 0;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ freeDarray ( darrayBuf[i] );
+ index += counters[i];
+ }
+
+ if ( fillGap )
+ {
+ fprintf ( stderr, "Threads processed %d scaffolds.\n", index );
+ }
+
+ free ( ( void * ) darrayBuf );
+
+ if ( readSeqInGap )
+ {
+ freeDarray ( readSeqInGap );
+ }
+
+ fclose ( fp );
+ fclose ( fo );
+ fclose ( foc );
+ fclose ( fo2 );
+
+ if ( visual )
+ {
+ fclose ( foc2 );
+ fclose ( fo3 );
+ }
+
+ freeStack ( ctgStack );
+ freeStackBuf ( ctgStackBuffer, scafBufSize );
+ free ( ( void * ) flagBuf );
+ free ( ( void * ) thrdNoBuf );
+ free ( ( void * ) ctgStackBuffer );
}
diff --git a/standardPregraph/read2edge.c b/standardPregraph/read2edge.c
index dcea79b..465761f 100644
--- a/standardPregraph/read2edge.c
+++ b/standardPregraph/read2edge.c
@@ -1,7 +1,7 @@
/*
* read2edge.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -38,76 +38,76 @@ static const Kmer kmerZero = { 0, 0 };
static int buffer_size = 100000000;
//buffer related varibles for chop kmer
static int read_c;
-static char ** rcSeq;
-static char ** seqBuffer;
-static int * lenBuffer;
-static char * seqLine;
+static char **rcSeq;
+static char **seqBuffer;
+static int *lenBuffer;
+static char *seqLine;
static int lLineLen = 5000;
// kmer related variables
static int kmer_c;
-static Kmer * kmerBuffer;
+static Kmer *kmerBuffer;
//static ubyte8 *hashBanBuffer;
-static boolean * smallerBuffer;
-static int * indexArray;
+static boolean *smallerBuffer;
+static int *indexArray;
int nowstep = 0;
int firsttime = 1;
typedef struct fileReadSet
{
- long offset;
- struct fileReadSet * next;
+ long offset;
+ struct fileReadSet *next;
} FILEREADSET;
//record useful reads from parse1readcheck()
static int file_num = 0;
-static char ** file_Name;
+static char **file_Name;
//record file type:1=fa��2=fq
-static int * file_type;
+static int *file_type;
//record file max read length
-static int * file_maxReadLen;
+static int *file_maxReadLen;
//record reads offset in file
//static long * offset;
-static boolean * nodeBuffer;
+static boolean *nodeBuffer;
-FILE * readSeqFile;
+FILE *readSeqFile;
static int writeFileNo;
-FILE * writeSeqFile;
+FILE *writeSeqFile;
-static void threadRoutine ( void * thrdID );
+static void threadRoutine ( void *thrdID );
static void chopKmer4read ( int t, int threadID );
-static void thread_wait ( pthread_t * threads );
-void Read2edge ( char * libfile, char * graph, int maxk );
-void Read2edge2 ( char * libfile, char * graph, int last, int maxk ); //, boolean keepReadFile);
+static void thread_wait ( pthread_t *threads );
+void Read2edge ( char *libfile, char *graph, int maxk );
+void Read2edge2 ( char *libfile, char *graph, int last, int maxk ); //, boolean keepReadFile);
static void parse1readcheck ( int t );
-static void searchKmer ( int t, KmerSet2 * kset, int threaID );
+static void searchKmer ( int t, KmerSet2 *kset, int threaID );
static void add1Arc2 ( unsigned int from_ed, unsigned int to_ed, unsigned int weight );
-static void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals );
-static void creatThrds ( pthread_t * threads, PARAMETER * paras );
-static void searchKmer1read ( int i, KmerSet2 * kset, int threadID );
+static void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals );
+static void creatThrds ( pthread_t *threads, PARAMETER *paras );
+static void searchKmer1read ( int i, KmerSet2 *kset, int threadID );
struct preArc
{
- unsigned int to_ed;
- unsigned int multiplicity;
- struct preArc * next;
+ unsigned int to_ed;
+ unsigned int multiplicity;
+ struct preArc *next;
};
struct preArc_array_t
{
- struct preArc ** store_pos;
- unsigned int array_sz;
+ struct preArc **store_pos;
+ unsigned int array_sz;
};
//arc array
struct preArc_array_t arc_arr;
-pthread_mutex_t * locks;
+pthread_mutex_t *locks;
/*************************************************
Function:
@@ -124,144 +124,159 @@ Output:
Return:
None.
*************************************************/
-inline void put_preArc ( struct preArc_array_t * arc_arr, unsigned int left_id, unsigned int right_id, unsigned int added_multi )
+inline void put_preArc ( struct preArc_array_t *arc_arr, unsigned int left_id, unsigned int right_id, unsigned int added_multi )
{
- struct preArc * arc = ( arc_arr->store_pos ) [left_id];
-
- if ( !arc )
- {
- ( arc_arr->store_pos ) [left_id] = ( struct preArc * ) malloc ( sizeof ( struct preArc ) );
- arc = ( arc_arr->store_pos ) [left_id];
- arc->to_ed = right_id;
- arc->multiplicity = added_multi;
- arc->next = NULL;
- return;
- }
-
- while ( arc )
- {
- if ( arc->to_ed == right_id )
- {
- arc->multiplicity += added_multi;
- return;
- }
-
- if ( arc->next == NULL ) { break; }
-
- arc = arc->next;
- }
-
- arc->next = ( struct preArc * ) malloc ( sizeof ( struct preArc ) );
- arc->next->to_ed = right_id;
- arc->next->multiplicity = added_multi;
- arc->next->next = NULL;
+ struct preArc *arc = ( arc_arr->store_pos ) [left_id];
+
+ if ( !arc )
+ {
+ ( arc_arr->store_pos ) [left_id] = ( struct preArc * ) malloc ( sizeof ( struct preArc ) );
+ arc = ( arc_arr->store_pos ) [left_id];
+ arc->to_ed = right_id;
+ arc->multiplicity = added_multi;
+ arc->next = NULL;
+ return;
+ }
+
+ while ( arc )
+ {
+ if ( arc->to_ed == right_id )
+ {
+ arc->multiplicity += added_multi;
+ return;
+ }
+
+ if ( arc->next == NULL )
+ {
+ break;
+ }
+
+ arc = arc->next;
+ }
+
+ arc->next = ( struct preArc * ) malloc ( sizeof ( struct preArc ) );
+ arc->next->to_ed = right_id;
+ arc->next->multiplicity = added_multi;
+ arc->next->next = NULL;
}
//Multi thread.
-inline void put_preArc_threaded ( struct preArc_array_t * arc_arr, pthread_mutex_t * locks, unsigned int left_id, unsigned int right_id, unsigned int added_multi )
+inline void put_preArc_threaded ( struct preArc_array_t *arc_arr, pthread_mutex_t *locks, unsigned int left_id, unsigned int right_id, unsigned int added_multi )
{
- pthread_mutex_lock ( &locks[left_id] );
- put_preArc ( arc_arr, left_id, right_id, added_multi );
- pthread_mutex_unlock ( &locks[left_id] );
+ pthread_mutex_lock ( &locks[left_id] );
+ put_preArc ( arc_arr, left_id, right_id, added_multi );
+ pthread_mutex_unlock ( &locks[left_id] );
}
//Init.
-void init_preArc_array ( struct preArc_array_t * arc_array, unsigned int sz )
+void init_preArc_array ( struct preArc_array_t *arc_array, unsigned int sz )
{
- arc_array->array_sz = sz;
- arc_array->store_pos = ( struct preArc ** ) calloc ( sz, sizeof ( struct preArc * ) );
+ arc_array->array_sz = sz;
+ arc_array->store_pos = ( struct preArc ** ) calloc ( sz, sizeof ( struct preArc * ) );
}
-static void creatThrds ( pthread_t * threads, PARAMETER * paras )
+static void creatThrds ( pthread_t *threads, PARAMETER *paras )
{
- unsigned char i;
- int temp;
-
- for ( i = 0; i < thrd_num; i++ )
- {
- //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
- if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
- {
- fprintf ( stderr, "Create threads failed.\n" );
- exit ( 1 );
- }
- }
-
- fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
+ unsigned char i;
+ int temp;
+
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ //printf("to create %dth thread\n",(*(char *)&(threadID[i])));
+ if ( ( temp = pthread_create ( &threads[i], NULL, ( void * ) threadRoutine, & ( paras[i] ) ) ) != 0 )
+ {
+ fprintf ( stderr, "Create threads failed.\n" );
+ exit ( 1 );
+ }
+ }
+
+ fprintf ( stderr, "%d thread(s) initialized.\n", thrd_num );
}
-static void thread_wait ( pthread_t * threads )
+static void thread_wait ( pthread_t *threads )
{
- int i;
+ int i;
- for ( i = 0; i < thrd_num; i++ )
- if ( threads[i] != 0 )
- { pthread_join ( threads[i], NULL ); }
+ for ( i = 0; i < thrd_num; i++ )
+ if ( threads[i] != 0 )
+ {
+ pthread_join ( threads[i], NULL );
+ }
}
-static void sendWorkSignal ( unsigned char SIG, unsigned char * thrdSignals )
+static void sendWorkSignal ( unsigned char SIG, unsigned char *thrdSignals )
{
- int t;
-
- for ( t = 0; t < thrd_num; t++ )
- { thrdSignals[t + 1] = SIG; }
-
- while ( 1 )
- {
- usleep ( 10 );
-
- for ( t = 0; t < thrd_num; t++ )
- if ( thrdSignals[t + 1] )
- { break; }
-
- if ( t == thrd_num )
- { break; }
- }
+ int t;
+
+ for ( t = 0; t < thrd_num; t++ )
+ {
+ thrdSignals[t + 1] = SIG;
+ }
+
+ while ( 1 )
+ {
+ usleep ( 10 );
+
+ for ( t = 0; t < thrd_num; t++ )
+ if ( thrdSignals[t + 1] )
+ {
+ break;
+ }
+
+ if ( t == thrd_num )
+ {
+ break;
+ }
+ }
}
-static void threadRoutine ( void * para )
+static void threadRoutine ( void *para )
{
- PARAMETER * prm;
- int i, t, j, start, finish, k;
- unsigned char id;
- prm = ( PARAMETER * ) para;
- id = prm->threadID;
-
- //printf("%dth thread with task %d, hash_table %p\n",id,prm.task,prm.hash_table);
- while ( 1 )
- {
- if ( * ( prm->selfSignal ) == 1 )
- {
- for ( i = 0; i < read_c; ++i )
- {
- if ( i % thrd_num != id )
- { continue; }
-
- chopKmer4read ( i, id + 1 );
- }
-
- * ( prm->selfSignal ) = 0;
- }
- else if ( * ( prm->selfSignal ) == 3 )
- {
- * ( prm->selfSignal ) = 0;
- break;
- }
- else if ( * ( prm->selfSignal ) == 5 )
- {
- for ( i = 0; i < read_c; ++i )
- {
- if ( i % thrd_num != id )
- { continue; }
-
- searchKmer1read ( i, KmerSetsNew, id );
- }
-
- * ( prm->selfSignal ) = 0;
- }
-
- usleep ( 1 );
- }
+ PARAMETER *prm;
+ int i, t, j, start, finish, k;
+ unsigned char id;
+ prm = ( PARAMETER * ) para;
+ id = prm->threadID;
+
+ //printf("%dth thread with task %d, hash_table %p\n",id,prm.task,prm.hash_table);
+ while ( 1 )
+ {
+ if ( * ( prm->selfSignal ) == 1 )
+ {
+ for ( i = 0; i < read_c; ++i )
+ {
+ if ( i % thrd_num != id )
+ {
+ continue;
+ }
+
+ chopKmer4read ( i, id + 1 );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+ else if ( * ( prm->selfSignal ) == 3 )
+ {
+ * ( prm->selfSignal ) = 0;
+ break;
+ }
+ else if ( * ( prm->selfSignal ) == 5 )
+ {
+ for ( i = 0; i < read_c; ++i )
+ {
+ if ( i % thrd_num != id )
+ {
+ continue;
+ }
+
+ searchKmer1read ( i, KmerSetsNew, id );
+ }
+
+ * ( prm->selfSignal ) = 0;
+ }
+
+ usleep ( 1 );
+ }
}
/*************************************************
@@ -278,198 +293,216 @@ Output:
Return:
None.
*************************************************/
-static void searchKmer1read ( int i, KmerSet2 * kset, int threadID )
+static void searchKmer1read ( int i, KmerSet2 *kset, int threadID )
{
- kmer_t2 * node1, *node2;
- struct edgeID * edge1, *edge2;
- unsigned int from_ed, to_ed, temp_from, temp_to;
- boolean found;
- ARC * temp = NULL;
- boolean last = false;
- int t;
-
- for ( t = indexArray[i]; t < indexArray[i + 1] - 1; ++t )
- {
- //get first match
- if ( !last )
- { found = search_kmerset2 ( kset, kmerBuffer[t], &node1 ); }
- else
- {
- found = true;
- node1 = node2;
- }
-
- //get next match
- if ( found )
- {
- found = search_kmerset2 ( kset, kmerBuffer[t + step], &node2 );
-
- if ( found )
- {
- edge1 = node1->edgeId;
-
- while ( edge1 )
- {
- edge2 = node2->edgeId;
-
- while ( edge2 )
- {
- temp_from = edge1->edge;
- temp_to = edge2->edge;
-
- if ( smallerBuffer[t] && smallerBuffer[t + step] )
- {
- if ( ( edge1->flag == 2 || edge1->flag == 4 || edge1->flag == 8 )
- && ( edge2->flag == 3 || edge2->flag == 4 || edge2->flag == 6 ) )
- {
- {
- if ( temp_from == temp_to || temp_from == getTwinEdge ( temp_to ) )
- {
- pthread_mutex_lock ( &locks[temp_from] );
- edge_array[temp_from].multi = 1;
- pthread_mutex_unlock ( &locks[temp_from] );
- pthread_mutex_lock ( &locks[getTwinEdge ( temp_from )] );
- edge_array[getTwinEdge ( temp_from )].multi = 1;
- pthread_mutex_unlock ( &locks[getTwinEdge ( temp_from )] );
- }
- else
- {
- if ( t == indexArray[i] || t == indexArray[i + 1] - 2 )
- {
- if ( t == indexArray[i] )
- { put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 ); }
- else
- { put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 ); }
- }
- else
- {
- put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
- }
- }
- }
- }
- }
- else if ( smallerBuffer[t] && !smallerBuffer[t + step] )
- {
- if ( ( edge1->flag == 2 || edge1->flag == 4 || edge1->flag == 8 )
- && ( edge2->flag == 1 || edge2->flag == 5 || edge2->flag == 7 ) )
- {
- {
- if ( temp_from == temp_to || temp_from == getTwinEdge ( temp_to ) )
- {
- pthread_mutex_lock ( &locks[temp_from] );
- edge_array[temp_from].multi = 1;
- pthread_mutex_unlock ( &locks[temp_from] );
- pthread_mutex_lock ( &locks[getTwinEdge ( temp_from )] );
- edge_array[getTwinEdge ( temp_from )].multi = 1;
- pthread_mutex_unlock ( &locks[getTwinEdge ( temp_from )] );
- }
- else
- {
- if ( t == indexArray[i] || t == indexArray[i + 1] - 2 )
- {
- if ( t == indexArray[i] )
- { put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 ); }
- else
- { put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 ); }
- }
- else
- {
- put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
- }
- }
- }
- }
- }
- else if ( !smallerBuffer[t] && smallerBuffer[t + step] )
- {
- if ( ( edge1->flag == 0 || edge1->flag == 5 || edge1->flag == 9 )
- && ( edge2->flag == 3 || edge2->flag == 4 || edge2->flag == 6 ) )
- {
- {
- if ( temp_from == temp_to || temp_from == getTwinEdge ( temp_to ) )
- {
- pthread_mutex_lock ( &locks[temp_from] );
- edge_array[temp_from].multi = 1;
- pthread_mutex_unlock ( &locks[temp_from] );
- pthread_mutex_lock ( &locks[getTwinEdge ( temp_from )] );
- edge_array[getTwinEdge ( temp_from )].multi = 1;
- pthread_mutex_unlock ( &locks[getTwinEdge ( temp_from )] );
- }
- else
- {
- if ( t == indexArray[i] || t == indexArray[i + 1] - 2 )
- {
- if ( t == indexArray[i] )
- { put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 ); }
- else
- { put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 ); }
- }
- else
- {
- put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
- }
- }
- }
- }
- }
- else if ( !smallerBuffer[t] && !smallerBuffer[t + step] )
- {
- if ( ( edge1->flag == 0 || edge1->flag == 5 || edge1->flag == 9 )
- && ( edge2->flag == 1 || edge2->flag == 5 || edge2->flag == 7 ) )
- {
- {
- if ( temp_from == temp_to || temp_from == getTwinEdge ( temp_to ) )
- {
- pthread_mutex_lock ( &locks[temp_from] );
- edge_array[temp_from].multi = 1;
- pthread_mutex_unlock ( &locks[temp_from] );
- pthread_mutex_lock ( &locks[getTwinEdge ( temp_from )] );
- edge_array[getTwinEdge ( temp_from )].multi = 1;
- pthread_mutex_unlock ( &locks[getTwinEdge ( temp_from )] );
- }
- else
- {
- if ( t == indexArray[i] || t == indexArray[i + 1] - 2 )
- {
- if ( t == indexArray[i] )
- { put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 ); }
- else
- { put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 ); }
- }
- else
- {
- put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
- }
- }
- }
- }
- }
-
- edge2 = edge2->next;
- }
-
- edge1 = edge1->next;
- }
-
- if ( firsttime )
- {
- nodeBuffer[t + step] = 1;
- }
-
- last = true;
- }
- else
- {
- last = false;
- }
-
- if ( firsttime )
- {
- nodeBuffer[t] = 1;
- }
- }
- }
+ kmer_t2 *node1, *node2;
+ struct edgeID *edge1, *edge2;
+ unsigned int from_ed, to_ed, temp_from, temp_to;
+ boolean found;
+ ARC *temp = NULL;
+ boolean last = false;
+ int t;
+
+ for ( t = indexArray[i]; t < indexArray[i + 1] - 1; ++t )
+ {
+ //get first match
+ if ( !last )
+ {
+ found = search_kmerset2 ( kset, kmerBuffer[t], &node1 );
+ }
+ else
+ {
+ found = true;
+ node1 = node2;
+ }
+
+ //get next match
+ if ( found )
+ {
+ found = search_kmerset2 ( kset, kmerBuffer[t + step], &node2 );
+
+ if ( found )
+ {
+ edge1 = node1->edgeId;
+
+ while ( edge1 )
+ {
+ edge2 = node2->edgeId;
+
+ while ( edge2 )
+ {
+ temp_from = edge1->edge;
+ temp_to = edge2->edge;
+
+ if ( smallerBuffer[t] && smallerBuffer[t + step] )
+ {
+ if ( ( edge1->flag == 2 || edge1->flag == 4 || edge1->flag == 8 )
+ && ( edge2->flag == 3 || edge2->flag == 4 || edge2->flag == 6 ) )
+ {
+ {
+ if ( temp_from == temp_to || temp_from == getTwinEdge ( temp_to ) )
+ {
+ pthread_mutex_lock ( &locks[temp_from] );
+ edge_array[temp_from].multi = 1;
+ pthread_mutex_unlock ( &locks[temp_from] );
+ pthread_mutex_lock ( &locks[getTwinEdge ( temp_from )] );
+ edge_array[getTwinEdge ( temp_from )].multi = 1;
+ pthread_mutex_unlock ( &locks[getTwinEdge ( temp_from )] );
+ }
+ else
+ {
+ if ( t == indexArray[i] || t == indexArray[i + 1] - 2 )
+ {
+ if ( t == indexArray[i] )
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ else
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ }
+ else
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ }
+ }
+ }
+ }
+ else if ( smallerBuffer[t] && !smallerBuffer[t + step] )
+ {
+ if ( ( edge1->flag == 2 || edge1->flag == 4 || edge1->flag == 8 )
+ && ( edge2->flag == 1 || edge2->flag == 5 || edge2->flag == 7 ) )
+ {
+ {
+ if ( temp_from == temp_to || temp_from == getTwinEdge ( temp_to ) )
+ {
+ pthread_mutex_lock ( &locks[temp_from] );
+ edge_array[temp_from].multi = 1;
+ pthread_mutex_unlock ( &locks[temp_from] );
+ pthread_mutex_lock ( &locks[getTwinEdge ( temp_from )] );
+ edge_array[getTwinEdge ( temp_from )].multi = 1;
+ pthread_mutex_unlock ( &locks[getTwinEdge ( temp_from )] );
+ }
+ else
+ {
+ if ( t == indexArray[i] || t == indexArray[i + 1] - 2 )
+ {
+ if ( t == indexArray[i] )
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ else
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ }
+ else
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ }
+ }
+ }
+ }
+ else if ( !smallerBuffer[t] && smallerBuffer[t + step] )
+ {
+ if ( ( edge1->flag == 0 || edge1->flag == 5 || edge1->flag == 9 )
+ && ( edge2->flag == 3 || edge2->flag == 4 || edge2->flag == 6 ) )
+ {
+ {
+ if ( temp_from == temp_to || temp_from == getTwinEdge ( temp_to ) )
+ {
+ pthread_mutex_lock ( &locks[temp_from] );
+ edge_array[temp_from].multi = 1;
+ pthread_mutex_unlock ( &locks[temp_from] );
+ pthread_mutex_lock ( &locks[getTwinEdge ( temp_from )] );
+ edge_array[getTwinEdge ( temp_from )].multi = 1;
+ pthread_mutex_unlock ( &locks[getTwinEdge ( temp_from )] );
+ }
+ else
+ {
+ if ( t == indexArray[i] || t == indexArray[i + 1] - 2 )
+ {
+ if ( t == indexArray[i] )
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ else
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ }
+ else
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ }
+ }
+ }
+ }
+ else if ( !smallerBuffer[t] && !smallerBuffer[t + step] )
+ {
+ if ( ( edge1->flag == 0 || edge1->flag == 5 || edge1->flag == 9 )
+ && ( edge2->flag == 1 || edge2->flag == 5 || edge2->flag == 7 ) )
+ {
+ {
+ if ( temp_from == temp_to || temp_from == getTwinEdge ( temp_to ) )
+ {
+ pthread_mutex_lock ( &locks[temp_from] );
+ edge_array[temp_from].multi = 1;
+ pthread_mutex_unlock ( &locks[temp_from] );
+ pthread_mutex_lock ( &locks[getTwinEdge ( temp_from )] );
+ edge_array[getTwinEdge ( temp_from )].multi = 1;
+ pthread_mutex_unlock ( &locks[getTwinEdge ( temp_from )] );
+ }
+ else
+ {
+ if ( t == indexArray[i] || t == indexArray[i + 1] - 2 )
+ {
+ if ( t == indexArray[i] )
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ else
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ }
+ else
+ {
+ put_preArc_threaded ( &arc_arr, locks, temp_from, temp_to, 1 );
+ }
+ }
+ }
+ }
+ }
+
+ edge2 = edge2->next;
+ }
+
+ edge1 = edge1->next;
+ }
+
+ if ( firsttime )
+ {
+ nodeBuffer[t + step] = 1;
+ }
+
+ last = true;
+ }
+ else
+ {
+ last = false;
+ }
+
+ if ( firsttime )
+ {
+ nodeBuffer[t] = 1;
+ }
+ }
+ }
}
/*************************************************
@@ -487,77 +520,77 @@ Return:
*************************************************/
static void chopKmer4read ( int t, int threadID )
{
- char * src_seq = seqBuffer[t];
- char * bal_seq = rcSeq[threadID];
- int len_seq = lenBuffer[t];
- int j, bal_j;
- ubyte8 hash_ban, bal_hash_ban;
- Kmer word, bal_word;
- int index;
-
- if ( len_seq < overlaplen + 1 )
- {
- return;
- }
+ char *src_seq = seqBuffer[t];
+ char *bal_seq = rcSeq[threadID];
+ int len_seq = lenBuffer[t];
+ int j, bal_j;
+ ubyte8 hash_ban, bal_hash_ban;
+ Kmer word, bal_word;
+ int index;
+
+ if ( len_seq < overlaplen + 1 )
+ {
+ return;
+ }
#ifdef MER127
- word = kmerZero;
+ word = kmerZero;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low2 |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low2 |= src_seq[index];
+ }
#else
- word = kmerZero;
+ word = kmerZero;
- for ( index = 0; index < overlaplen; index++ )
- {
- word = KmerLeftBitMoveBy2 ( word );
- word.low |= src_seq[index];
- }
+ for ( index = 0; index < overlaplen; index++ )
+ {
+ word = KmerLeftBitMoveBy2 ( word );
+ word.low |= src_seq[index];
+ }
#endif
- reverseComplementSeq ( src_seq, len_seq, bal_seq );
- // complementary node
- bal_word = reverseComplement ( word, overlaplen );
- bal_j = len_seq - 0 - overlaplen;
- index = indexArray[t];
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- kmerBuffer[index] = word;
- smallerBuffer[index] = 1;
- index++;
- }
- else
- {
- kmerBuffer[index] = bal_word;
- smallerBuffer[index] = 0;
- index++;
- }
-
- for ( j = 1; j <= len_seq - overlaplen; j ++ )
- {
- word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
- bal_j = len_seq - j - overlaplen; // j;
- bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
-
- if ( KmerSmaller ( word, bal_word ) )
- {
- kmerBuffer[index] = word;
- smallerBuffer[index] = 1;
- index++;
- }
- else
- {
- // complementary node
- kmerBuffer[index] = bal_word;
- smallerBuffer[index] = 0;
- index++;
- }
- }
+ reverseComplementSeq ( src_seq, len_seq, bal_seq );
+ // complementary node
+ bal_word = reverseComplement ( word, overlaplen );
+ bal_j = len_seq - 0 - overlaplen;
+ index = indexArray[t];
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ kmerBuffer[index] = word;
+ smallerBuffer[index] = 1;
+ index++;
+ }
+ else
+ {
+ kmerBuffer[index] = bal_word;
+ smallerBuffer[index] = 0;
+ index++;
+ }
+
+ for ( j = 1; j <= len_seq - overlaplen; j ++ )
+ {
+ word = nextKmer ( word, src_seq[j - 1 + overlaplen] );
+ bal_j = len_seq - j - overlaplen; // j;
+ bal_word = prevKmer ( bal_word, bal_seq[bal_j] );
+
+ if ( KmerSmaller ( word, bal_word ) )
+ {
+ kmerBuffer[index] = word;
+ smallerBuffer[index] = 1;
+ index++;
+ }
+ else
+ {
+ // complementary node
+ kmerBuffer[index] = bal_word;
+ smallerBuffer[index] = 0;
+ index++;
+ }
+ }
}
/*************************************************
@@ -573,20 +606,22 @@ Output:
Return:
Arc between two edges.
*************************************************/
-ARC * getArcBetween2 ( unsigned int from_ed, unsigned int to_ed )
+ARC *getArcBetween2 ( unsigned int from_ed, unsigned int to_ed )
{
- ARC * parc;
- parc = edge_array[from_ed].arcs;
+ ARC *parc;
+ parc = edge_array[from_ed].arcs;
- while ( parc )
- {
- if ( parc->to_ed == to_ed )
- { return parc; }
+ while ( parc )
+ {
+ if ( parc->to_ed == to_ed )
+ {
+ return parc;
+ }
- parc = parc->next;
- }
+ parc = parc->next;
+ }
- return parc;
+ return parc;
}
/*************************************************
@@ -605,55 +640,61 @@ Return:
*************************************************/
static void add1Arc2 ( unsigned int from_ed, unsigned int to_ed, unsigned int weight )
{
- unsigned int bal_fe = getTwinEdge ( from_ed );
- unsigned int bal_te = getTwinEdge ( to_ed );
-
- if ( from_ed > num_ed || to_ed > num_ed || bal_fe > num_ed || bal_te > num_ed )
- { return; }
-
- ARC * parc, *bal_parc;
- //both arcs already exist
- parc = getArcBetween ( from_ed, to_ed );
-
- if ( parc )
- {
- bal_parc = parc->bal_arc;
- parc->multiplicity += weight;
- bal_parc->multiplicity += weight;
- return;
- }
-
- //create new arcs
- parc = allocateArc ( to_ed );
- parc->multiplicity = weight;
- parc->prev = NULL;
-
- if ( edge_array[from_ed].arcs )
- { edge_array[from_ed].arcs->prev = parc; }
-
- parc->next = edge_array[from_ed].arcs;
- edge_array[from_ed].arcs = parc;
-
- // A->A'
- if ( bal_te == from_ed )
- {
- parc->bal_arc = parc;
- parc->multiplicity += weight;
- return;
- }
-
- bal_parc = allocateArc ( bal_fe );
- bal_parc->multiplicity = weight;
- bal_parc->prev = NULL;
-
- if ( edge_array[bal_te].arcs )
- { edge_array[bal_te].arcs->prev = bal_parc; }
-
- bal_parc->next = edge_array[bal_te].arcs;
- edge_array[bal_te].arcs = bal_parc;
- //link them to each other
- parc->bal_arc = bal_parc;
- bal_parc->bal_arc = parc;
+ unsigned int bal_fe = getTwinEdge ( from_ed );
+ unsigned int bal_te = getTwinEdge ( to_ed );
+
+ if ( from_ed > num_ed || to_ed > num_ed || bal_fe > num_ed || bal_te > num_ed )
+ {
+ return;
+ }
+
+ ARC *parc, *bal_parc;
+ //both arcs already exist
+ parc = getArcBetween ( from_ed, to_ed );
+
+ if ( parc )
+ {
+ bal_parc = parc->bal_arc;
+ parc->multiplicity += weight;
+ bal_parc->multiplicity += weight;
+ return;
+ }
+
+ //create new arcs
+ parc = allocateArc ( to_ed );
+ parc->multiplicity = weight;
+ parc->prev = NULL;
+
+ if ( edge_array[from_ed].arcs )
+ {
+ edge_array[from_ed].arcs->prev = parc;
+ }
+
+ parc->next = edge_array[from_ed].arcs;
+ edge_array[from_ed].arcs = parc;
+
+ // A->A'
+ if ( bal_te == from_ed )
+ {
+ parc->bal_arc = parc;
+ parc->multiplicity += weight;
+ return;
+ }
+
+ bal_parc = allocateArc ( bal_fe );
+ bal_parc->multiplicity = weight;
+ bal_parc->prev = NULL;
+
+ if ( edge_array[bal_te].arcs )
+ {
+ edge_array[bal_te].arcs->prev = bal_parc;
+ }
+
+ bal_parc->next = edge_array[bal_te].arcs;
+ edge_array[bal_te].arcs = bal_parc;
+ //link them to each other
+ parc->bal_arc = bal_parc;
+ bal_parc->bal_arc = parc;
}
/*************************************************
@@ -670,59 +711,59 @@ Return:
*************************************************/
static void parse1readcheck ( int t )
{
- unsigned int j;
- unsigned int start, finish;
- boolean found;
- start = indexArray[t];
- finish = indexArray[t + 1];
- boolean readfound = 0;
- nowstep = 1;
- int curr_fileNo;
- FILEREADSET * head, * next, *curr;
-
- for ( j = start; j < finish - nowstep; ++j )
- {
- found = nodeBuffer[j];
-
- if ( found )
- {
- found = nodeBuffer[j + nowstep];
-
- if ( found )
- {
- readfound = 1;
- break;
- }
- }
- }
-
- if ( readfound )
- {
- ++foundreadcount;
-
- if ( !writeFileNo )
- {
- int index, num_actg;
-
- for ( index = 0; index < lenBuffer[t]; index++ )
- {
- fprintf ( writeSeqFile, "%c", int2base ( seqBuffer[t][index] ) );
- }
-
- fprintf ( writeSeqFile, "\n" );
- }
- }
+ unsigned int j;
+ unsigned int start, finish;
+ boolean found;
+ start = indexArray[t];
+ finish = indexArray[t + 1];
+ boolean readfound = 0;
+ nowstep = 1;
+ int curr_fileNo;
+ FILEREADSET *head, * next, *curr;
+
+ for ( j = start; j < finish - nowstep; ++j )
+ {
+ found = nodeBuffer[j];
+
+ if ( found )
+ {
+ found = nodeBuffer[j + nowstep];
+
+ if ( found )
+ {
+ readfound = 1;
+ break;
+ }
+ }
+ }
+
+ if ( readfound )
+ {
+ ++foundreadcount;
+
+ if ( !writeFileNo )
+ {
+ int index, num_actg;
+
+ for ( index = 0; index < lenBuffer[t]; index++ )
+ {
+ fprintf ( writeSeqFile, "%c", int2base ( seqBuffer[t][index] ) );
+ }
+
+ fprintf ( writeSeqFile, "\n" );
+ }
+ }
}
//Free.
void free_new()
{
- free_libs();
- free ( file_Name );
- free ( file_type );
- file_type = NULL;
- free ( file_maxReadLen );
- file_maxReadLen = NULL;
+ free_libs();
+ free ( file_Name );
+ free ( file_type );
+ file_type = NULL;
+ free ( file_maxReadLen );
+ file_maxReadLen = NULL;
}
/*************************************************
@@ -740,398 +781,416 @@ Output:
Return:
None.
*************************************************/
-void Read2edge ( char * libfile, char * graph, int maxk )
+void Read2edge ( char *libfile, char *graph, int maxk )
{
- long long i;
- char * next_name;
- int maxReadNum, fileNo;
- boolean flag, pairs = 0;
- pthread_t threads[thrd_num];
- unsigned char thrdSignal[thrd_num + 1];
- PARAMETER paras[thrd_num];
- maxReadLen = 0;
- maxNameLen = 256;
- //scan lib info
- scan_libInfo ( libfile );
-
- if ( !maxReadLen )
- { maxReadLen = 100; }
-
- if ( maxk > maxReadLen )
- {
- fprintf ( stderr, "-- Max kmer %d larger than max read length %d, please define a smaller value. --\n", maxk, maxReadLen );
- abort();
- }
-
- maxReadLen4all = maxReadLen;
- fprintf ( stderr, "In file: %s, max seq len %d, max name len %d.\n",
- libfile, maxReadLen, maxNameLen );
- int m, n, index;
- file_num = 0;
-
- for ( m = 0; m < num_libs; m++ )
- {
- if ( lib_array[m].asm_flag == 1 || lib_array[m].asm_flag == 3 )
- file_num += lib_array[m].num_a1_file
- + lib_array[m].num_a2_file
- + lib_array[m].num_p_file
- + lib_array[m].num_q1_file
- + lib_array[m].num_q2_file
- + lib_array[m].num_s_a_file
- + lib_array[m].num_s_q_file;
- }
-
- file_Name = ( char ** ) ckalloc ( file_num * sizeof ( char * ) );
- file_type = ( int * ) ckalloc ( file_num * sizeof ( int ) );
- file_maxReadLen = ( int * ) ckalloc ( file_num * sizeof ( int ) );
- index = 0;
- //2013-5-14
- int maxReadLenLocal = 0;
-
- for ( m = 0; m < num_libs; m++ )
- {
- if ( lib_array[m].asm_flag != 1 && lib_array[m].asm_flag != 3 )
- { continue; }
-
- //2013-5-14
- if ( lib_array[m].rd_len_cutoff > 0 )
- { maxReadLenLocal = lib_array[m].rd_len_cutoff < maxReadLen4all ? lib_array[m].rd_len_cutoff : maxReadLen4all; }
- else
- { maxReadLenLocal = maxReadLen4all; }
-
- //fa1 fa2
- for ( n = 0; n < lib_array[m].num_a1_file; n++ )
- {
- if ( strlen ( lib_array[m].a1_fname[n] ) > 3 && strcmp ( lib_array[m].a1_fname[n] + strlen ( lib_array[m].a1_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 3;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].a1_fname[n];
- file_type[index] = 3;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].a2_fname[n];
- }
- else
- {
- file_type[index] = 1;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].a1_fname[n];
- file_type[index] = 1;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].a2_fname[n];
- }
- }
-
- //fq1 fq2
- for ( n = 0; n < lib_array[m].num_q1_file; n++ )
- {
- if ( strlen ( lib_array[m].q1_fname[n] ) > 3 && strcmp ( lib_array[m].q1_fname[n] + strlen ( lib_array[m].q1_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 4;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].q1_fname[n];
- file_type[index] = 4;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].q2_fname[n];
- }
- else
- {
- file_type[index] = 2;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].q1_fname[n];
- file_type[index] = 2;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].q2_fname[n];
- }
- }
-
- //fp
- for ( n = 0; n < lib_array[m].num_p_file; n++ )
- {
- if ( strlen ( lib_array[m].p_fname[n] ) > 3 && strcmp ( lib_array[m].p_fname[n] + strlen ( lib_array[m].p_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 3;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].p_fname[n];
- }
- else
- {
- file_type[index] = 1;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].p_fname[n];
- }
- }
-
- //fa
- for ( n = 0; n < lib_array[m].num_s_a_file; n++ )
- {
- if ( strlen ( lib_array[m].s_a_fname[n] ) > 3 && strcmp ( lib_array[m].s_a_fname[n] + strlen ( lib_array[m].s_a_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 3;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].s_a_fname[n];
- }
- else
- {
- file_type[index] = 1;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].s_a_fname[n];
- }
- }
-
- //fq
- for ( n = 0; n < lib_array[m].num_s_q_file; n++ )
- {
- if ( strlen ( lib_array[m].s_q_fname[n] ) > 3 && strcmp ( lib_array[m].s_q_fname[n] + strlen ( lib_array[m].s_q_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 4;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].s_q_fname[n];
- }
- else
- {
- file_type[index] = 2;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].s_q_fname[n];
- }
- }
- }
-
- //init
- next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
- pthread_mutex_init ( &mutex_arc, NULL );
- nodeBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
- maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
- seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
- lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
- indexArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
- init_preArc_array ( &arc_arr, num_ed + 2 );
- locks = ( pthread_mutex_t * ) calloc ( arc_arr.array_sz, sizeof ( pthread_mutex_t ) );
- unsigned int ii;
-
- for ( ii = 0; ii < arc_arr.array_sz; ++ii )
- {
- pthread_mutex_init ( &locks[ii], NULL );
- }
-
- for ( i = 0; i < maxReadNum; i++ )
- { seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) ); }
-
- rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
- thrdSignal[0] = 0;
-
- if ( 1 )
- {
- for ( i = 0; i < thrd_num; i++ )
- {
- rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- thrdSignal[i + 1] = 0;
- paras[i].threadID = i;
- paras[i].mainSignal = &thrdSignal[0];
- paras[i].selfSignal = &thrdSignal[i + 1];
- }
-
- creatThrds ( threads, paras );
- }
-
- if ( 1 )
- {
- rcSeq[0] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- }
-
- kmer_c = n_solexa = read_c = i = readNumBack = gradsCounter = 0;
- fileNo = -1;
- int t0, t1, t2, t3, t4, t5, t6;
- t0 = t1 = t2 = t3 = t4 = t5 = t6 = 0;
- time_t read_start, read_end, time_bef, time_aft;
- time ( &read_start );
- int t;
- char writeSeqName[256];
- writeFileNo = 0;
- sprintf ( writeSeqName, "%s.read", graph );
- writeSeqFile = ckopen ( writeSeqName, "w" );
- int type;
- FILE * file = NULL;
- long pos_seq = 0;
-
- //parse all reads
- while ( ( flag = read1seqInLibpos ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), //file,
- &fileNo, file_num, file_Name, file_type, file_maxReadLen, &pos_seq ) ) != 0 )
- {
- if ( ( ++i ) % 100000000 == 0 )
- { fprintf ( stderr, "--- %lldth reads.\n", i ); }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- { continue; }
-
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
-
- if ( read_c == maxReadNum )
- {
- indexArray[read_c] = kmer_c;
- time ( &read_end );
- t0 += read_end - read_start;
- time ( &time_bef );
- //chop kmer for reads
- sendWorkSignal ( 1, thrdSignal ); //chopKmer4read
- time ( &time_aft );
- t1 += time_aft - time_bef;
- time ( &time_bef );
- //add arc
- sendWorkSignal ( 5, thrdSignal ); //searchKmer1read
- time ( &time_aft );
- t2 += time_aft - time_bef;
- time ( &time_bef );
-
- //check whether reads is useful
- for ( t = 0; t < read_c; ++t )
- {
- parse1readcheck ( t );
- }
-
- memset ( nodeBuffer, '\0', buffer_size * sizeof ( boolean ) );
- time ( &time_aft );
- t3 += time_aft - time_bef;
- kmer_c = 0;
- read_c = 0;
- time ( &read_start );
- }
- }
-
- //take care of last round
- if ( read_c )
- {
- indexArray[read_c] = kmer_c;
- time ( &read_end );
- t0 += read_end - read_start;
- time ( &time_bef );
- //chop kmer for reads
- sendWorkSignal ( 1, thrdSignal ); //chopKmer4read
- time ( &time_aft );
- t1 += time_aft - time_bef;
- time ( &time_bef );
- //add arc
- sendWorkSignal ( 5, thrdSignal ); //searchKmer1read
- time ( &time_aft );
- t2 += time_aft - time_bef;
- time ( &time_bef );
- struct preArc * parc;
- unsigned int ii;
-
- for ( ii = 0; ii < arc_arr.array_sz; ++ii )
- {
- parc = ( arc_arr.store_pos ) [ii];
-
- if ( parc )
- {
- while ( parc )
- {
- add1Arc2 ( ii, parc->to_ed, parc->multiplicity );
- parc = parc->next;
- }
- }
- }
-
- //check whether reads is useful
- for ( t = 0; t < read_c; ++t )
- {
- parse1readcheck ( t );
- }
-
- time ( &time_aft );
- t3 += time_aft - time_bef;
- }
- else
- {
- struct preArc * parc;
- unsigned int ii;
-
- for ( ii = 0; ii < arc_arr.array_sz; ++ii )
- {
- parc = ( arc_arr.store_pos ) [ii];
-
- if ( parc )
- {
- while ( parc )
- {
- add1Arc2 ( ii, parc->to_ed, parc->multiplicity );
- parc = parc->next;
- }
- }
- }
- }
-
- fprintf ( stderr, "%lld read(s) processed.\n", i );
- // fprintf(stderr, "Time spent on reading file: %ds,chop reads: %ds, search kmer: %ds, parse reads: %ds.\n",t0,t1,t2, t3);
- fprintf ( stderr, "Time spent on:\n" );
- fprintf ( stderr, " importing reads: %ds,\n", t0 );
- fprintf ( stderr, " chopping reads to kmers: %ds,\n", t1 );
- fprintf ( stderr, " searching kmers in hash: %ds,\n", t2 );
- fprintf ( stderr, " parsing reads: %ds.\n", t3 );
-
- if ( foundreadcount )
- { fprintf ( stderr, "%lld reads available.\n", foundreadcount ); }
-
- foundreadcount = 0;
- //exit
- sendWorkSignal ( 3, thrdSignal );
- thread_wait ( threads );
-
- if ( 1 )
- {
- for ( i = 0; i < thrd_num; i++ )
- {
- free ( ( void * ) rcSeq[i + 1] );
- }
- }
-
- if ( 1 )
- {
- free ( ( void * ) rcSeq[0] );
- }
-
- free ( ( void * ) rcSeq );
- rcSeq = NULL;
-
- for ( i = 0; i < maxReadNum; i++ )
- { free ( ( void * ) seqBuffer[i] ); }
-
- free ( ( void * ) seqBuffer );
- seqBuffer = NULL;
- free ( ( void * ) lenBuffer );
- lenBuffer = NULL;
- free ( ( void * ) indexArray );
- indexArray = NULL;
- free ( ( void * ) kmerBuffer );
- kmerBuffer = NULL;
- free ( ( void * ) smallerBuffer );
- smallerBuffer = NULL;
- free ( ( void * ) nodeBuffer );
- nodeBuffer = NULL;
- free ( ( void * ) locks );
- struct preArc * temp, *temp_next;
-
- for ( i = 0; i < arc_arr.array_sz; ++i )
- {
- temp = ( arc_arr.store_pos ) [i];
-
- while ( temp )
- {
- temp_next = temp->next;
- free ( ( void * ) ( temp ) );
- temp = temp_next;
- }
- }
-
- free ( ( void * ) arc_arr.store_pos );
- free ( ( void * ) next_name );
- writeFileNo++;
- fclose ( writeSeqFile );
- free_new();
+ long long i;
+ char *next_name;
+ int maxReadNum, fileNo;
+ boolean flag, pairs = 0;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+ maxReadLen = 0;
+ maxNameLen = 256;
+ //scan lib info
+ scan_libInfo ( libfile );
+
+ if ( !maxReadLen )
+ {
+ maxReadLen = 100;
+ }
+
+ if ( maxk > maxReadLen )
+ {
+ fprintf ( stderr, "-- Max kmer %d larger than max read length %d, please define a smaller value. --\n", maxk, maxReadLen );
+ abort();
+ }
+
+ maxReadLen4all = maxReadLen;
+ fprintf ( stderr, "In file: %s, max seq len %d, max name len %d.\n",
+ libfile, maxReadLen, maxNameLen );
+ int m, n, index;
+ file_num = 0;
+
+ for ( m = 0; m < num_libs; m++ )
+ {
+ if ( lib_array[m].asm_flag == 1 || lib_array[m].asm_flag == 3 )
+ file_num += lib_array[m].num_a1_file
+ + lib_array[m].num_a2_file
+ + lib_array[m].num_p_file
+ + lib_array[m].num_q1_file
+ + lib_array[m].num_q2_file
+ + lib_array[m].num_s_a_file
+ + lib_array[m].num_s_q_file;
+ }
+
+ file_Name = ( char ** ) ckalloc ( file_num * sizeof ( char * ) );
+ file_type = ( int * ) ckalloc ( file_num * sizeof ( int ) );
+ file_maxReadLen = ( int * ) ckalloc ( file_num * sizeof ( int ) );
+ index = 0;
+ //2013-5-14
+ int maxReadLenLocal = 0;
+
+ for ( m = 0; m < num_libs; m++ )
+ {
+ if ( lib_array[m].asm_flag != 1 && lib_array[m].asm_flag != 3 )
+ {
+ continue;
+ }
+
+ //2013-5-14
+ if ( lib_array[m].rd_len_cutoff > 0 )
+ {
+ maxReadLenLocal = lib_array[m].rd_len_cutoff < maxReadLen4all ? lib_array[m].rd_len_cutoff : maxReadLen4all;
+ }
+ else
+ {
+ maxReadLenLocal = maxReadLen4all;
+ }
+
+ //fa1 fa2
+ for ( n = 0; n < lib_array[m].num_a1_file; n++ )
+ {
+ if ( strlen ( lib_array[m].a1_fname[n] ) > 3 && strcmp ( lib_array[m].a1_fname[n] + strlen ( lib_array[m].a1_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 3;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].a1_fname[n];
+ file_type[index] = 3;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].a2_fname[n];
+ }
+ else
+ {
+ file_type[index] = 1;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].a1_fname[n];
+ file_type[index] = 1;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].a2_fname[n];
+ }
+ }
+
+ //fq1 fq2
+ for ( n = 0; n < lib_array[m].num_q1_file; n++ )
+ {
+ if ( strlen ( lib_array[m].q1_fname[n] ) > 3 && strcmp ( lib_array[m].q1_fname[n] + strlen ( lib_array[m].q1_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 4;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].q1_fname[n];
+ file_type[index] = 4;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].q2_fname[n];
+ }
+ else
+ {
+ file_type[index] = 2;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].q1_fname[n];
+ file_type[index] = 2;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].q2_fname[n];
+ }
+ }
+
+ //fp
+ for ( n = 0; n < lib_array[m].num_p_file; n++ )
+ {
+ if ( strlen ( lib_array[m].p_fname[n] ) > 3 && strcmp ( lib_array[m].p_fname[n] + strlen ( lib_array[m].p_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 3;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].p_fname[n];
+ }
+ else
+ {
+ file_type[index] = 1;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].p_fname[n];
+ }
+ }
+
+ //fa
+ for ( n = 0; n < lib_array[m].num_s_a_file; n++ )
+ {
+ if ( strlen ( lib_array[m].s_a_fname[n] ) > 3 && strcmp ( lib_array[m].s_a_fname[n] + strlen ( lib_array[m].s_a_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 3;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].s_a_fname[n];
+ }
+ else
+ {
+ file_type[index] = 1;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].s_a_fname[n];
+ }
+ }
+
+ //fq
+ for ( n = 0; n < lib_array[m].num_s_q_file; n++ )
+ {
+ if ( strlen ( lib_array[m].s_q_fname[n] ) > 3 && strcmp ( lib_array[m].s_q_fname[n] + strlen ( lib_array[m].s_q_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 4;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].s_q_fname[n];
+ }
+ else
+ {
+ file_type[index] = 2;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].s_q_fname[n];
+ }
+ }
+ }
+
+ //init
+ next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
+ pthread_mutex_init ( &mutex_arc, NULL );
+ nodeBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
+ maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
+ seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
+ lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
+ indexArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
+ init_preArc_array ( &arc_arr, num_ed + 2 );
+ locks = ( pthread_mutex_t * ) calloc ( arc_arr.array_sz, sizeof ( pthread_mutex_t ) );
+ unsigned int ii;
+
+ for ( ii = 0; ii < arc_arr.array_sz; ++ii )
+ {
+ pthread_mutex_init ( &locks[ii], NULL );
+ }
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
+ thrdSignal[0] = 0;
+
+ if ( 1 )
+ {
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ }
+
+ creatThrds ( threads, paras );
+ }
+
+ if ( 1 )
+ {
+ rcSeq[0] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ kmer_c = n_solexa = read_c = i = readNumBack = gradsCounter = 0;
+ fileNo = -1;
+ int t0, t1, t2, t3, t4, t5, t6;
+ t0 = t1 = t2 = t3 = t4 = t5 = t6 = 0;
+ time_t read_start, read_end, time_bef, time_aft;
+ time ( &read_start );
+ int t;
+ char writeSeqName[256];
+ writeFileNo = 0;
+ sprintf ( writeSeqName, "%s.read", graph );
+ writeSeqFile = ckopen ( writeSeqName, "w" );
+ int type;
+ FILE *file = NULL;
+ long pos_seq = 0;
+
+ //parse all reads
+ while ( ( flag = read1seqInLibpos ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), //file,
+ &fileNo, file_num, file_Name, file_type, file_maxReadLen, &pos_seq ) ) != 0 )
+ {
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ continue;
+ }
+
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+
+ if ( read_c == maxReadNum )
+ {
+ indexArray[read_c] = kmer_c;
+ time ( &read_end );
+ t0 += read_end - read_start;
+ time ( &time_bef );
+ //chop kmer for reads
+ sendWorkSignal ( 1, thrdSignal ); //chopKmer4read
+ time ( &time_aft );
+ t1 += time_aft - time_bef;
+ time ( &time_bef );
+ //add arc
+ sendWorkSignal ( 5, thrdSignal ); //searchKmer1read
+ time ( &time_aft );
+ t2 += time_aft - time_bef;
+ time ( &time_bef );
+
+ //check whether reads is useful
+ for ( t = 0; t < read_c; ++t )
+ {
+ parse1readcheck ( t );
+ }
+
+ memset ( nodeBuffer, '\0', buffer_size * sizeof ( boolean ) );
+ time ( &time_aft );
+ t3 += time_aft - time_bef;
+ kmer_c = 0;
+ read_c = 0;
+ time ( &read_start );
+ }
+ }
+
+ //take care of last round
+ if ( read_c )
+ {
+ indexArray[read_c] = kmer_c;
+ time ( &read_end );
+ t0 += read_end - read_start;
+ time ( &time_bef );
+ //chop kmer for reads
+ sendWorkSignal ( 1, thrdSignal ); //chopKmer4read
+ time ( &time_aft );
+ t1 += time_aft - time_bef;
+ time ( &time_bef );
+ //add arc
+ sendWorkSignal ( 5, thrdSignal ); //searchKmer1read
+ time ( &time_aft );
+ t2 += time_aft - time_bef;
+ time ( &time_bef );
+ struct preArc *parc;
+ unsigned int ii;
+
+ for ( ii = 0; ii < arc_arr.array_sz; ++ii )
+ {
+ parc = ( arc_arr.store_pos ) [ii];
+
+ if ( parc )
+ {
+ while ( parc )
+ {
+ add1Arc2 ( ii, parc->to_ed, parc->multiplicity );
+ parc = parc->next;
+ }
+ }
+ }
+
+ //check whether reads is useful
+ for ( t = 0; t < read_c; ++t )
+ {
+ parse1readcheck ( t );
+ }
+
+ time ( &time_aft );
+ t3 += time_aft - time_bef;
+ }
+ else
+ {
+ struct preArc *parc;
+ unsigned int ii;
+
+ for ( ii = 0; ii < arc_arr.array_sz; ++ii )
+ {
+ parc = ( arc_arr.store_pos ) [ii];
+
+ if ( parc )
+ {
+ while ( parc )
+ {
+ add1Arc2 ( ii, parc->to_ed, parc->multiplicity );
+ parc = parc->next;
+ }
+ }
+ }
+ }
+
+ fprintf ( stderr, "%lld read(s) processed.\n", i );
+ // fprintf(stderr, "Time spent on reading file: %ds,chop reads: %ds, search kmer: %ds, parse reads: %ds.\n",t0,t1,t2, t3);
+ fprintf ( stderr, "Time spent on:\n" );
+ fprintf ( stderr, " importing reads: %ds,\n", t0 );
+ fprintf ( stderr, " chopping reads to kmers: %ds,\n", t1 );
+ fprintf ( stderr, " searching kmers in hash: %ds,\n", t2 );
+ fprintf ( stderr, " parsing reads: %ds.\n", t3 );
+
+ if ( foundreadcount )
+ {
+ fprintf ( stderr, "%lld reads available.\n", foundreadcount );
+ }
+
+ foundreadcount = 0;
+ //exit
+ sendWorkSignal ( 3, thrdSignal );
+ thread_wait ( threads );
+
+ if ( 1 )
+ {
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ free ( ( void * ) rcSeq[i + 1] );
+ }
+ }
+
+ if ( 1 )
+ {
+ free ( ( void * ) rcSeq[0] );
+ }
+
+ free ( ( void * ) rcSeq );
+ rcSeq = NULL;
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ free ( ( void * ) seqBuffer[i] );
+ }
+
+ free ( ( void * ) seqBuffer );
+ seqBuffer = NULL;
+ free ( ( void * ) lenBuffer );
+ lenBuffer = NULL;
+ free ( ( void * ) indexArray );
+ indexArray = NULL;
+ free ( ( void * ) kmerBuffer );
+ kmerBuffer = NULL;
+ free ( ( void * ) smallerBuffer );
+ smallerBuffer = NULL;
+ free ( ( void * ) nodeBuffer );
+ nodeBuffer = NULL;
+ free ( ( void * ) locks );
+ struct preArc *temp, *temp_next;
+
+ for ( i = 0; i < arc_arr.array_sz; ++i )
+ {
+ temp = ( arc_arr.store_pos ) [i];
+
+ while ( temp )
+ {
+ temp_next = temp->next;
+ free ( ( void * ) ( temp ) );
+ temp = temp_next;
+ }
+ }
+
+ free ( ( void * ) arc_arr.store_pos );
+ free ( ( void * ) next_name );
+ writeFileNo++;
+ fclose ( writeSeqFile );
+ free_new();
}
int temp_times = 1;
@@ -1150,48 +1209,52 @@ Output:
Return:
1 if success.
*************************************************/
-static boolean read1seqInNewFile ( char * src_seq, int * len_seq, long * pos_seq )
+static boolean read1seqInNewFile ( char *src_seq, int *len_seq, long *pos_seq )
{
- char c;
- char * str = seqLine;
- int strLen = 0, i;
-
- /*
- if(temp_times>0)
- {
- *pos_seq = ftell(readSeqFile);
- }
- */
- if ( fgets ( str, lLineLen, readSeqFile ) )
- {
- strLen = strlen ( str );
- *len_seq = strLen - 1;
-
- for ( i = 0; i < strLen - 1; i++ )
- {
- if ( str[i] >= 'a' && str[i] <= 'z' )
- {
- c = base2int ( str[i] - 'a' + 'A' );
- src_seq[i] = c;
- }
- else if ( str[i] >= 'A' && str[i] <= 'Z' )
- {
- c = base2int ( str[i] );
- src_seq[i] = c;
- // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
- else if ( str[i] == '.' )
- {
- c = base2int ( 'A' );
- src_seq[i] = c;
- } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
- }
-
- if ( strLen == 0 )
- { return 0; }
- else
- { return 1; }
+ char c;
+ char *str = seqLine;
+ int strLen = 0, i;
+
+ /*
+ if(temp_times>0)
+ {
+ *pos_seq = ftell(readSeqFile);
+ }
+ */
+ if ( fgets ( str, lLineLen, readSeqFile ) )
+ {
+ strLen = strlen ( str );
+ *len_seq = strLen - 1;
+
+ for ( i = 0; i < strLen - 1; i++ )
+ {
+ if ( str[i] >= 'a' && str[i] <= 'z' )
+ {
+ c = base2int ( str[i] - 'a' + 'A' );
+ src_seq[i] = c;
+ }
+ else if ( str[i] >= 'A' && str[i] <= 'Z' )
+ {
+ c = base2int ( str[i] );
+ src_seq[i] = c;
+ // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ else if ( str[i] == '.' )
+ {
+ c = base2int ( 'A' );
+ src_seq[i] = c;
+ } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ }
+
+ if ( strLen == 0 )
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
}
@@ -1213,405 +1276,423 @@ Return:
None.
*************************************************/
//void Read2edge2(char *libfile, char *graph, int lastTime, int maxk, boolean keepReadFile)
-void Read2edge2 ( char * libfile, char * graph, int lastTime, int maxk )
+void Read2edge2 ( char *libfile, char *graph, int lastTime, int maxk )
{
- long long i;
- char * next_name;
- int maxReadNum, fileNo;
- boolean flag, pairs = 0;
- pthread_t threads[thrd_num];
- unsigned char thrdSignal[thrd_num + 1];
- PARAMETER paras[thrd_num];
- maxReadLen = 0;
- maxNameLen = 256;
- //scan lib info
- scan_libInfo ( libfile );
-
- if ( !maxReadLen )
- { maxReadLen = 100; }
-
- if ( maxk > maxReadLen )
- {
- fprintf ( stderr, "-- Max kmer %d larger than max read length %d, please define a smaller value. --\n", maxk, maxReadLen );
- abort();
- }
-
- maxReadLen4all = maxReadLen;
- fprintf ( stderr, "In file: %s, max seq len %d, max name len %d.\n",
- libfile, maxReadLen, maxNameLen );
- int m, n, index;
- file_num = 0;
-
- for ( m = 0; m < num_libs; m++ )
- {
- if ( lib_array[m].asm_flag == 1 || lib_array[m].asm_flag == 3 )
- file_num += lib_array[m].num_a1_file
- + lib_array[m].num_a2_file
- + lib_array[m].num_p_file
- + lib_array[m].num_q1_file
- + lib_array[m].num_q2_file
- + lib_array[m].num_s_a_file
- + lib_array[m].num_s_q_file;
- }
-
- file_Name = ( char ** ) ckalloc ( file_num * sizeof ( char * ) );
- file_type = ( int * ) ckalloc ( file_num * sizeof ( int ) );
- file_maxReadLen = ( int * ) ckalloc ( file_num * sizeof ( int ) );
- index = 0;
- //2013-5-14
- int maxReadLenLocal = 0;
-
- for ( m = 0; m < num_libs; m++ )
- {
- if ( lib_array[m].asm_flag != 1 && lib_array[m].asm_flag != 3 )
- { continue; }
-
- if ( lib_array[m].rd_len_cutoff > 0 )
- { maxReadLenLocal = lib_array[m].rd_len_cutoff < maxReadLen4all ? lib_array[m].rd_len_cutoff : maxReadLen4all; }
- else
- { maxReadLenLocal = maxReadLen4all; }
-
- //fa1 fa2
- for ( n = 0; n < lib_array[m].num_a1_file; n++ )
- {
- if ( strlen ( lib_array[m].a1_fname[n] ) > 3 && strcmp ( lib_array[m].a1_fname[n] + strlen ( lib_array[m].a1_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 3;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].a1_fname[n];
- file_type[index] = 3;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].a2_fname[n];
- }
- else
- {
- file_type[index] = 1;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].a1_fname[n];
- file_type[index] = 1;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].a2_fname[n];
- }
- }
-
- //fq1 fq2
- for ( n = 0; n < lib_array[m].num_q1_file; n++ )
- {
- if ( strlen ( lib_array[m].q1_fname[n] ) > 3 && strcmp ( lib_array[m].q1_fname[n] + strlen ( lib_array[m].q1_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 4;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].q1_fname[n];
- file_type[index] = 4;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].q2_fname[n];
- }
- else
- {
- file_type[index] = 2;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].q1_fname[n];
- file_type[index] = 2;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].q2_fname[n];
- }
- }
-
- //fp
- for ( n = 0; n < lib_array[m].num_p_file; n++ )
- {
- if ( strlen ( lib_array[m].p_fname[n] ) > 3 && strcmp ( lib_array[m].p_fname[n] + strlen ( lib_array[m].p_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 3;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].p_fname[n];
- }
- else
- {
- file_type[index] = 1;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].p_fname[n];
- }
- }
-
- //fa
- for ( n = 0; n < lib_array[m].num_s_a_file; n++ )
- {
- if ( strlen ( lib_array[m].s_a_fname[n] ) > 3 && strcmp ( lib_array[m].s_a_fname[n] + strlen ( lib_array[m].s_a_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 3;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].s_a_fname[n];
- }
- else
- {
- file_type[index] = 1;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].s_a_fname[n];
- }
- }
-
- //fq
- for ( n = 0; n < lib_array[m].num_s_q_file; n++ )
- {
- if ( strlen ( lib_array[m].s_q_fname[n] ) > 3 && strcmp ( lib_array[m].s_q_fname[n] + strlen ( lib_array[m].s_q_fname[n] ) - 3, ".gz" ) == 0 )
- {
- file_type[index] = 4;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].s_q_fname[n];
- }
- else
- {
- file_type[index] = 2;
- file_maxReadLen[index] = maxReadLenLocal;
- file_Name[index++] = lib_array[m].s_q_fname[n];
- }
- }
- }
-
- //init
- next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
- kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
- smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
- init_preArc_array ( &arc_arr, num_ed + 2 );
- locks = ( pthread_mutex_t * ) calloc ( arc_arr.array_sz, sizeof ( pthread_mutex_t ) );
- unsigned int ii;
-
- for ( ii = 0; ii < arc_arr.array_sz; ++ii )
- {
- pthread_mutex_init ( &locks[ii], NULL );
- }
-
- pthread_mutex_init ( &mutex_arc, NULL );
- firsttime = 0;
- maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
- seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
- lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
- indexArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
-
- // offset = (long*)ckalloc((maxReadNum+1)*sizeof(long));
-
- for ( i = 0; i < maxReadNum; i++ )
- { seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) ); }
-
- rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
- thrdSignal[0] = 0;
-
- if ( 1 )
- {
- for ( i = 0; i < thrd_num; i++ )
- {
- rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- thrdSignal[i + 1] = 0;
- paras[i].threadID = i;
- paras[i].mainSignal = &thrdSignal[0];
- paras[i].selfSignal = &thrdSignal[i + 1];
- }
-
- creatThrds ( threads, paras );
- }
-
- if ( 1 )
- {
- rcSeq[0] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
- }
-
- kmer_c = n_solexa = read_c = i = readNumBack = gradsCounter = 0;
- fileNo = -1;
- int t0, t1, t2, t3, t4, t5, t6;
- t0 = t1 = t2 = t3 = t4 = t5 = t6 = 0;
- time_t read_start, read_end, time_bef, time_aft;
- time ( &read_start );
- // fprintf(stderr, "Start to read.\n");
- int t;
- char readSeqName[256];
- sprintf ( readSeqName, "%s.read", graph );
- readSeqFile = ckopen ( readSeqName, "r" );
- FILE * file = NULL;
- long pos_seq = 0;
- char tmp[lLineLen];
-
- if ( maxReadLen > lLineLen )
- {
- lLineLen = maxReadLen + 1;
- seqLine = ( char * ) ckalloc ( lLineLen * sizeof ( char ) );
- }
- else
- {
- seqLine = tmp;
- }
-
- //parse all reads
- while ( ( flag = read1seqInNewFile ( seqBuffer[read_c], & ( lenBuffer[read_c] ), &pos_seq ) ) )
- {
- if ( ( ++i ) % 100000000 == 0 )
- { fprintf ( stderr, "--- %lldth reads.\n", i ); }
-
- if ( lenBuffer[read_c] < overlaplen + 1 )
- { continue; }
-
- // offset[read_c]=pos_seq;
- indexArray[read_c] = kmer_c;
- kmer_c += lenBuffer[read_c] - overlaplen + 1;
- read_c++;
-
- if ( read_c == maxReadNum )
- {
- indexArray[read_c] = kmer_c;
- time ( &read_end );
- t0 += read_end - read_start;
- time ( &time_bef );
- //chop kmer
- sendWorkSignal ( 1, thrdSignal ); //chopKmer4read
- time ( &time_aft );
- t1 += time_aft - time_bef;
- time ( &time_bef );
- //add arc
- sendWorkSignal ( 5, thrdSignal ); //searchKmer1read
- time ( &time_aft );
- t2 += time_aft - time_bef;
- time ( &time_bef );
- time ( &time_aft );
- t3 += time_aft - time_bef;
- kmer_c = 0;
- read_c = 0;
- time ( &read_start );
- }
- }
-
- if ( read_c )
- {
- indexArray[read_c] = kmer_c;
- time ( &read_end );
- t0 += read_end - read_start;
- time ( &time_bef );
- //chop kmer
- sendWorkSignal ( 1, thrdSignal ); //chopKmer4read
- time ( &time_aft );
- t1 += time_aft - time_bef;
- time ( &time_bef );
- //add arc
- sendWorkSignal ( 5, thrdSignal ); //searchKmer1read
- time ( &time_aft );
- t2 += time_aft - time_bef;
- time ( &time_bef );
- struct preArc * parc;
- unsigned int ii;
-
- for ( ii = 0; ii < arc_arr.array_sz; ++ii )
- {
- parc = ( arc_arr.store_pos ) [ii];
-
- if ( parc )
- {
- while ( parc )
- {
- add1Arc2 ( ii, parc->to_ed, parc->multiplicity );
- parc = parc->next;
- }
- }
- }
-
- time ( &time_aft );
- t3 += time_aft - time_bef;
- }
- else
- {
- struct preArc * parc;
- unsigned int ii;
-
- for ( ii = 0; ii < arc_arr.array_sz; ++ii )
- {
- parc = ( arc_arr.store_pos ) [ii];
-
- if ( parc )
- {
- while ( parc )
- {
- add1Arc2 ( ii, parc->to_ed, parc->multiplicity );
- parc = parc->next;
- }
- }
- }
- }
-
- fprintf ( stderr, "%lld read(s) processed.\n", i );
- // fprintf(stderr, "Time spent on reading file: %ds,chop reads: %ds, search kmer: %ds, parse reads: %ds.\n",t0,t1,t2, t3);
- fprintf ( stderr, "Time spent on:\n" );
- fprintf ( stderr, " importing reads: %ds,\n", t0 );
- fprintf ( stderr, " chopping reads to kmers: %ds,\n", t1 );
- fprintf ( stderr, " searching kmers in hash: %ds,\n", t2 );
- fprintf ( stderr, " parsing reads: %ds.\n", t3 );
-
- if ( foundreadcount )
- { fprintf ( stderr, "%lld reads available.\n", foundreadcount ); }
-
- foundreadcount = 0;
- //exit
- sendWorkSignal ( 3, thrdSignal );
- thread_wait ( threads );
-
- if ( 1 )
- {
- for ( i = 0; i < thrd_num; i++ )
- {
- free ( ( void * ) rcSeq[i + 1] );
- }
- }
-
- if ( 1 )
- {
- free ( ( void * ) rcSeq[0] );
- }
-
- free ( ( void * ) rcSeq );
- rcSeq = NULL;
-
- for ( i = 0; i < maxReadNum; i++ )
- { free ( ( void * ) seqBuffer[i] ); }
-
- free ( ( void * ) seqBuffer );
- seqBuffer = NULL;
- free ( ( void * ) lenBuffer );
- lenBuffer = NULL;
- free ( ( void * ) indexArray );
- indexArray = NULL;
- free ( ( void * ) kmerBuffer );
- kmerBuffer = NULL;
- free ( ( void * ) smallerBuffer );
- smallerBuffer = NULL;
- // free((void*)offset);
- // offset=NULL;
- free ( ( void * ) locks );
- struct preArc * temp, *temp_next;
-
- for ( i = 0; i < arc_arr.array_sz; ++i )
- {
- temp = ( arc_arr.store_pos ) [i];
-
- while ( temp )
- {
- temp_next = temp->next;
- free ( ( void * ) ( temp ) );
- temp = temp_next;
- }
- }
-
- free ( ( void * ) arc_arr.store_pos );
- free ( ( void * ) next_name );
- fclose ( readSeqFile );
-
- if ( !lastTime )
- {
- // if(!keepReadFile)
- remove ( readSeqName );
- }
-
- free_new();
-
- if ( maxReadLen > lLineLen )
- {
- free ( ( void * ) seqLine );
- }
+ long long i;
+ char *next_name;
+ int maxReadNum, fileNo;
+ boolean flag, pairs = 0;
+ pthread_t threads[thrd_num];
+ unsigned char thrdSignal[thrd_num + 1];
+ PARAMETER paras[thrd_num];
+ maxReadLen = 0;
+ maxNameLen = 256;
+ //scan lib info
+ scan_libInfo ( libfile );
+
+ if ( !maxReadLen )
+ {
+ maxReadLen = 100;
+ }
+
+ if ( maxk > maxReadLen )
+ {
+ fprintf ( stderr, "-- Max kmer %d larger than max read length %d, please define a smaller value. --\n", maxk, maxReadLen );
+ abort();
+ }
+
+ maxReadLen4all = maxReadLen;
+ fprintf ( stderr, "In file: %s, max seq len %d, max name len %d.\n",
+ libfile, maxReadLen, maxNameLen );
+ int m, n, index;
+ file_num = 0;
+
+ for ( m = 0; m < num_libs; m++ )
+ {
+ if ( lib_array[m].asm_flag == 1 || lib_array[m].asm_flag == 3 )
+ file_num += lib_array[m].num_a1_file
+ + lib_array[m].num_a2_file
+ + lib_array[m].num_p_file
+ + lib_array[m].num_q1_file
+ + lib_array[m].num_q2_file
+ + lib_array[m].num_s_a_file
+ + lib_array[m].num_s_q_file;
+ }
+
+ file_Name = ( char ** ) ckalloc ( file_num * sizeof ( char * ) );
+ file_type = ( int * ) ckalloc ( file_num * sizeof ( int ) );
+ file_maxReadLen = ( int * ) ckalloc ( file_num * sizeof ( int ) );
+ index = 0;
+ //2013-5-14
+ int maxReadLenLocal = 0;
+
+ for ( m = 0; m < num_libs; m++ )
+ {
+ if ( lib_array[m].asm_flag != 1 && lib_array[m].asm_flag != 3 )
+ {
+ continue;
+ }
+
+ if ( lib_array[m].rd_len_cutoff > 0 )
+ {
+ maxReadLenLocal = lib_array[m].rd_len_cutoff < maxReadLen4all ? lib_array[m].rd_len_cutoff : maxReadLen4all;
+ }
+ else
+ {
+ maxReadLenLocal = maxReadLen4all;
+ }
+
+ //fa1 fa2
+ for ( n = 0; n < lib_array[m].num_a1_file; n++ )
+ {
+ if ( strlen ( lib_array[m].a1_fname[n] ) > 3 && strcmp ( lib_array[m].a1_fname[n] + strlen ( lib_array[m].a1_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 3;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].a1_fname[n];
+ file_type[index] = 3;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].a2_fname[n];
+ }
+ else
+ {
+ file_type[index] = 1;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].a1_fname[n];
+ file_type[index] = 1;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].a2_fname[n];
+ }
+ }
+
+ //fq1 fq2
+ for ( n = 0; n < lib_array[m].num_q1_file; n++ )
+ {
+ if ( strlen ( lib_array[m].q1_fname[n] ) > 3 && strcmp ( lib_array[m].q1_fname[n] + strlen ( lib_array[m].q1_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 4;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].q1_fname[n];
+ file_type[index] = 4;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].q2_fname[n];
+ }
+ else
+ {
+ file_type[index] = 2;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].q1_fname[n];
+ file_type[index] = 2;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].q2_fname[n];
+ }
+ }
+
+ //fp
+ for ( n = 0; n < lib_array[m].num_p_file; n++ )
+ {
+ if ( strlen ( lib_array[m].p_fname[n] ) > 3 && strcmp ( lib_array[m].p_fname[n] + strlen ( lib_array[m].p_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 3;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].p_fname[n];
+ }
+ else
+ {
+ file_type[index] = 1;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].p_fname[n];
+ }
+ }
+
+ //fa
+ for ( n = 0; n < lib_array[m].num_s_a_file; n++ )
+ {
+ if ( strlen ( lib_array[m].s_a_fname[n] ) > 3 && strcmp ( lib_array[m].s_a_fname[n] + strlen ( lib_array[m].s_a_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 3;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].s_a_fname[n];
+ }
+ else
+ {
+ file_type[index] = 1;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].s_a_fname[n];
+ }
+ }
+
+ //fq
+ for ( n = 0; n < lib_array[m].num_s_q_file; n++ )
+ {
+ if ( strlen ( lib_array[m].s_q_fname[n] ) > 3 && strcmp ( lib_array[m].s_q_fname[n] + strlen ( lib_array[m].s_q_fname[n] ) - 3, ".gz" ) == 0 )
+ {
+ file_type[index] = 4;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].s_q_fname[n];
+ }
+ else
+ {
+ file_type[index] = 2;
+ file_maxReadLen[index] = maxReadLenLocal;
+ file_Name[index++] = lib_array[m].s_q_fname[n];
+ }
+ }
+ }
+
+ //init
+ next_name = ( char * ) ckalloc ( ( maxNameLen + 1 ) * sizeof ( char ) );
+ kmerBuffer = ( Kmer * ) ckalloc ( buffer_size * sizeof ( Kmer ) );
+ smallerBuffer = ( boolean * ) ckalloc ( buffer_size * sizeof ( boolean ) );
+ init_preArc_array ( &arc_arr, num_ed + 2 );
+ locks = ( pthread_mutex_t * ) calloc ( arc_arr.array_sz, sizeof ( pthread_mutex_t ) );
+ unsigned int ii;
+
+ for ( ii = 0; ii < arc_arr.array_sz; ++ii )
+ {
+ pthread_mutex_init ( &locks[ii], NULL );
+ }
+
+ pthread_mutex_init ( &mutex_arc, NULL );
+ firsttime = 0;
+ maxReadNum = buffer_size / ( maxReadLen - overlaplen + 1 );
+ seqBuffer = ( char ** ) ckalloc ( maxReadNum * sizeof ( char * ) );
+ lenBuffer = ( int * ) ckalloc ( maxReadNum * sizeof ( int ) );
+ indexArray = ( int * ) ckalloc ( ( maxReadNum + 1 ) * sizeof ( int ) );
+
+ // offset = (long*)ckalloc((maxReadNum+1)*sizeof(long));
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ seqBuffer[i] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ rcSeq = ( char ** ) ckalloc ( ( thrd_num + 1 ) * sizeof ( char * ) );
+ thrdSignal[0] = 0;
+
+ if ( 1 )
+ {
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ rcSeq[i + 1] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ thrdSignal[i + 1] = 0;
+ paras[i].threadID = i;
+ paras[i].mainSignal = &thrdSignal[0];
+ paras[i].selfSignal = &thrdSignal[i + 1];
+ }
+
+ creatThrds ( threads, paras );
+ }
+
+ if ( 1 )
+ {
+ rcSeq[0] = ( char * ) ckalloc ( maxReadLen * sizeof ( char ) );
+ }
+
+ kmer_c = n_solexa = read_c = i = readNumBack = gradsCounter = 0;
+ fileNo = -1;
+ int t0, t1, t2, t3, t4, t5, t6;
+ t0 = t1 = t2 = t3 = t4 = t5 = t6 = 0;
+ time_t read_start, read_end, time_bef, time_aft;
+ time ( &read_start );
+ // fprintf(stderr, "Start to read.\n");
+ int t;
+ char readSeqName[256];
+ sprintf ( readSeqName, "%s.read", graph );
+ readSeqFile = ckopen ( readSeqName, "r" );
+ FILE *file = NULL;
+ long pos_seq = 0;
+ char tmp[lLineLen];
+
+ if ( maxReadLen > lLineLen )
+ {
+ lLineLen = maxReadLen + 1;
+ seqLine = ( char * ) ckalloc ( lLineLen * sizeof ( char ) );
+ }
+ else
+ {
+ seqLine = tmp;
+ }
+
+ //parse all reads
+ while ( ( flag = read1seqInNewFile ( seqBuffer[read_c], & ( lenBuffer[read_c] ), &pos_seq ) ) )
+ {
+ if ( ( ++i ) % 100000000 == 0 )
+ {
+ fprintf ( stderr, "--- %lldth reads.\n", i );
+ }
+
+ if ( lenBuffer[read_c] < overlaplen + 1 )
+ {
+ continue;
+ }
+
+ // offset[read_c]=pos_seq;
+ indexArray[read_c] = kmer_c;
+ kmer_c += lenBuffer[read_c] - overlaplen + 1;
+ read_c++;
+
+ if ( read_c == maxReadNum )
+ {
+ indexArray[read_c] = kmer_c;
+ time ( &read_end );
+ t0 += read_end - read_start;
+ time ( &time_bef );
+ //chop kmer
+ sendWorkSignal ( 1, thrdSignal ); //chopKmer4read
+ time ( &time_aft );
+ t1 += time_aft - time_bef;
+ time ( &time_bef );
+ //add arc
+ sendWorkSignal ( 5, thrdSignal ); //searchKmer1read
+ time ( &time_aft );
+ t2 += time_aft - time_bef;
+ time ( &time_bef );
+ time ( &time_aft );
+ t3 += time_aft - time_bef;
+ kmer_c = 0;
+ read_c = 0;
+ time ( &read_start );
+ }
+ }
+
+ if ( read_c )
+ {
+ indexArray[read_c] = kmer_c;
+ time ( &read_end );
+ t0 += read_end - read_start;
+ time ( &time_bef );
+ //chop kmer
+ sendWorkSignal ( 1, thrdSignal ); //chopKmer4read
+ time ( &time_aft );
+ t1 += time_aft - time_bef;
+ time ( &time_bef );
+ //add arc
+ sendWorkSignal ( 5, thrdSignal ); //searchKmer1read
+ time ( &time_aft );
+ t2 += time_aft - time_bef;
+ time ( &time_bef );
+ struct preArc *parc;
+ unsigned int ii;
+
+ for ( ii = 0; ii < arc_arr.array_sz; ++ii )
+ {
+ parc = ( arc_arr.store_pos ) [ii];
+
+ if ( parc )
+ {
+ while ( parc )
+ {
+ add1Arc2 ( ii, parc->to_ed, parc->multiplicity );
+ parc = parc->next;
+ }
+ }
+ }
+
+ time ( &time_aft );
+ t3 += time_aft - time_bef;
+ }
+ else
+ {
+ struct preArc *parc;
+ unsigned int ii;
+
+ for ( ii = 0; ii < arc_arr.array_sz; ++ii )
+ {
+ parc = ( arc_arr.store_pos ) [ii];
+
+ if ( parc )
+ {
+ while ( parc )
+ {
+ add1Arc2 ( ii, parc->to_ed, parc->multiplicity );
+ parc = parc->next;
+ }
+ }
+ }
+ }
+
+ fprintf ( stderr, "%lld read(s) processed.\n", i );
+ // fprintf(stderr, "Time spent on reading file: %ds,chop reads: %ds, search kmer: %ds, parse reads: %ds.\n",t0,t1,t2, t3);
+ fprintf ( stderr, "Time spent on:\n" );
+ fprintf ( stderr, " importing reads: %ds,\n", t0 );
+ fprintf ( stderr, " chopping reads to kmers: %ds,\n", t1 );
+ fprintf ( stderr, " searching kmers in hash: %ds,\n", t2 );
+ fprintf ( stderr, " parsing reads: %ds.\n", t3 );
+
+ if ( foundreadcount )
+ {
+ fprintf ( stderr, "%lld reads available.\n", foundreadcount );
+ }
+
+ foundreadcount = 0;
+ //exit
+ sendWorkSignal ( 3, thrdSignal );
+ thread_wait ( threads );
+
+ if ( 1 )
+ {
+ for ( i = 0; i < thrd_num; i++ )
+ {
+ free ( ( void * ) rcSeq[i + 1] );
+ }
+ }
+
+ if ( 1 )
+ {
+ free ( ( void * ) rcSeq[0] );
+ }
+
+ free ( ( void * ) rcSeq );
+ rcSeq = NULL;
+
+ for ( i = 0; i < maxReadNum; i++ )
+ {
+ free ( ( void * ) seqBuffer[i] );
+ }
+
+ free ( ( void * ) seqBuffer );
+ seqBuffer = NULL;
+ free ( ( void * ) lenBuffer );
+ lenBuffer = NULL;
+ free ( ( void * ) indexArray );
+ indexArray = NULL;
+ free ( ( void * ) kmerBuffer );
+ kmerBuffer = NULL;
+ free ( ( void * ) smallerBuffer );
+ smallerBuffer = NULL;
+ // free((void*)offset);
+ // offset=NULL;
+ free ( ( void * ) locks );
+ struct preArc *temp, *temp_next;
+
+ for ( i = 0; i < arc_arr.array_sz; ++i )
+ {
+ temp = ( arc_arr.store_pos ) [i];
+
+ while ( temp )
+ {
+ temp_next = temp->next;
+ free ( ( void * ) ( temp ) );
+ temp = temp_next;
+ }
+ }
+
+ free ( ( void * ) arc_arr.store_pos );
+ free ( ( void * ) next_name );
+ fclose ( readSeqFile );
+
+ if ( !lastTime )
+ {
+ // if(!keepReadFile)
+ remove ( readSeqName );
+ }
+
+ free_new();
+
+ if ( maxReadLen > lLineLen )
+ {
+ free ( ( void * ) seqLine );
+ }
}
diff --git a/standardPregraph/read2scaf.c b/standardPregraph/read2scaf.c
index 00e9827..d6a4100 100644
--- a/standardPregraph/read2scaf.c
+++ b/standardPregraph/read2scaf.c
@@ -1,7 +1,7 @@
/*
* read2scaf.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -31,297 +31,297 @@ static int allGaps;
// for multi threads
static int scafBufSize = 100;
-static STACK ** ctgStackBuffer;
+static STACK **ctgStackBuffer;
static int scafCounter;
static int scafInBuf;
static void convertIndex ()
{
- int * length_array = ( int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( int ) );
- unsigned int i;
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- length_array[i] = 0;
- }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- if ( index_array[i] > 0 )
- {
- length_array[index_array[i]] = i;
- }
- }
-
- for ( i = 1; i <= num_ctg; i++ )
- {
- index_array[i] = length_array[i];
- } //contig i with new index: index_array[i]
-
- free ( ( void * ) length_array );
+ int *length_array = ( int * ) ckalloc ( ( num_ctg + 1 ) * sizeof ( int ) );
+ unsigned int i;
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ length_array[i] = 0;
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ if ( index_array[i] > 0 )
+ {
+ length_array[index_array[i]] = i;
+ }
+ }
+
+ for ( i = 1; i <= num_ctg; i++ )
+ {
+ index_array[i] = length_array[i];
+ } //contig i with new index: index_array[i]
+
+ free ( ( void * ) length_array );
}
-static void reverseStack ( STACK * dStack, STACK * sStack )
+static void reverseStack ( STACK *dStack, STACK *sStack )
{
- CTGinSCAF * actg, *ctgPt;
- emptyStack ( dStack );
-
- while ( ( actg = ( CTGinSCAF * ) stackPop ( sStack ) ) != NULL )
- {
- ctgPt = ( CTGinSCAF * ) stackPush ( dStack );
- ctgPt->ctgID = actg->ctgID;
- ctgPt->start = actg->start;
- ctgPt->end = actg->end;
- }
-
- stackBackup ( dStack );
+ CTGinSCAF *actg, *ctgPt;
+ emptyStack ( dStack );
+
+ while ( ( actg = ( CTGinSCAF * ) stackPop ( sStack ) ) != NULL )
+ {
+ ctgPt = ( CTGinSCAF * ) stackPush ( dStack );
+ ctgPt->ctgID = actg->ctgID;
+ ctgPt->start = actg->start;
+ ctgPt->end = actg->end;
+ }
+
+ stackBackup ( dStack );
}
-static void initStackBuf ( STACK ** ctgStackBuffer, int scafBufSize )
+static void initStackBuf ( STACK **ctgStackBuffer, int scafBufSize )
{
- int i;
+ int i;
- for ( i = 0; i < scafBufSize; i++ )
- {
- ctgStackBuffer[i] = ( STACK * ) createStack ( 100, sizeof ( CTGinSCAF ) );
- }
+ for ( i = 0; i < scafBufSize; i++ )
+ {
+ ctgStackBuffer[i] = ( STACK * ) createStack ( 100, sizeof ( CTGinSCAF ) );
+ }
}
-static void freeStackBuf ( STACK ** ctgStackBuffer, int scafBufSize )
+static void freeStackBuf ( STACK **ctgStackBuffer, int scafBufSize )
{
- int i;
+ int i;
- for ( i = 0; i < scafBufSize; i++ )
- {
- freeStack ( ctgStackBuffer[i] );
- }
+ for ( i = 0; i < scafBufSize; i++ )
+ {
+ freeStack ( ctgStackBuffer[i] );
+ }
}
static void mapCtg2Scaf ( int scafInBuf )
{
- int i, scafID;
- CTGinSCAF * actg;
- STACK * ctgsStack;
- unsigned int ctg, bal_ctg;
-
- for ( i = 0; i < scafInBuf; i++ )
- {
- scafID = scafCounter + i + 1;
- ctgsStack = ctgStackBuffer[i];
-
- while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
- {
- ctg = actg->ctgID;
- bal_ctg = getTwinCtg ( ctg );
-
- if ( contig_array[ctg].from_vt != 0 )
- {
- contig_array[ctg].multi = 1;
- contig_array[bal_ctg].multi = 1;
- continue;
- }
-
- contig_array[ctg].from_vt = scafID;
- contig_array[ctg].to_vt = actg->start;
- contig_array[ctg].flag = 0; //ctg and scaf on the same strand
- contig_array[bal_ctg].from_vt = scafID;
- contig_array[bal_ctg].to_vt = actg->start;
- contig_array[bal_ctg].flag = 1;
- }
- }
+ int i, scafID;
+ CTGinSCAF *actg;
+ STACK *ctgsStack;
+ unsigned int ctg, bal_ctg;
+
+ for ( i = 0; i < scafInBuf; i++ )
+ {
+ scafID = scafCounter + i + 1;
+ ctgsStack = ctgStackBuffer[i];
+
+ while ( ( actg = stackPop ( ctgsStack ) ) != NULL )
+ {
+ ctg = actg->ctgID;
+ bal_ctg = getTwinCtg ( ctg );
+
+ if ( contig_array[ctg].from_vt != 0 )
+ {
+ contig_array[ctg].multi = 1;
+ contig_array[bal_ctg].multi = 1;
+ continue;
+ }
+
+ contig_array[ctg].from_vt = scafID;
+ contig_array[ctg].to_vt = actg->start;
+ contig_array[ctg].flag = 0; //ctg and scaf on the same strand
+ contig_array[bal_ctg].from_vt = scafID;
+ contig_array[bal_ctg].to_vt = actg->start;
+ contig_array[bal_ctg].flag = 1;
+ }
+ }
}
-static void locateContigOnscaff ( char * graphfile )
+static void locateContigOnscaff ( char *graphfile )
{
- FILE * fp;
- char line[1024];
- CTGinSCAF * actg;
- STACK * ctgStack, *aStack;
- int index = 0, counter, overallLen;
- int starter, prev_start, gapN, scafLen;
- unsigned int ctg, prev_ctg = 0;
-
- for ( ctg = 1; ctg <= num_ctg; ctg++ )
- {
- contig_array[ctg].from_vt = 0;
- contig_array[ctg].multi = 0;
- }
-
- ctgStack = ( STACK * ) createStack ( 1000, sizeof ( CTGinSCAF ) );
- sprintf ( line, "%s.scaf_gap", graphfile );
- fp = ckopen ( line, "r" );
- ctgStackBuffer = ( STACK ** ) ckalloc ( scafBufSize * sizeof ( STACK * ) );
- initStackBuf ( ctgStackBuffer, scafBufSize );
- Ncounter = scafCounter = scafInBuf = allGaps = 0;
-
- while ( fgets ( line, sizeof ( line ), fp ) != NULL )
- {
- if ( line[0] == '>' )
- {
- if ( index )
- {
- aStack = ctgStackBuffer[scafInBuf++];
- reverseStack ( aStack, ctgStack );
-
- if ( scafInBuf == scafBufSize )
- {
- mapCtg2Scaf ( scafInBuf );
- scafCounter += scafInBuf;
- scafInBuf = 0;
- }
-
- if ( index % 1000 == 0 )
- {
- fprintf ( stderr, "Processed %d scaffolds.\n", index );
- }
- }
-
- //read next scaff
- scafLen = prev_ctg = 0;
- emptyStack ( ctgStack );
- sscanf ( line + 9, "%d %d %d", &index, &counter, &overallLen );
- //fprintf(stderr,">%d\n",index);
- continue;
- }
-
- if ( line[0] == 'G' ) // gap appears
- {
- continue;
- }
-
- if ( line[0] >= '0' && line[0] <= '9' ) // a contig line
- {
- sscanf ( line, "%d %d", &ctg, &starter );
- actg = ( CTGinSCAF * ) stackPush ( ctgStack );
- actg->ctgID = ctg;
-
- if ( !prev_ctg )
- {
- actg->start = scafLen;
- actg->end = actg->start + overlaplen + contig_array[ctg].length - 1;
- }
- else
- {
- gapN = starter - prev_start - ( int ) contig_array[prev_ctg].length;
- gapN = gapN < 1 ? 1 : gapN;
- actg->start = scafLen + gapN;
- actg->end = actg->start + contig_array[ctg].length - 1;
- }
-
- //fprintf(stderr,"%d\t%d\n",actg->start,actg->end);
- scafLen = actg->end + 1;
- prev_ctg = ctg;
- prev_start = starter;
- }
- }
-
- if ( index )
- {
- aStack = ctgStackBuffer[scafInBuf++];
- reverseStack ( aStack, ctgStack );
- mapCtg2Scaf ( scafInBuf );
- }
-
- gapN = 0;
-
- for ( ctg = 1; ctg <= num_ctg; ctg++ )
- {
- if ( contig_array[ctg].from_vt == 0 || contig_array[ctg].multi == 1 )
- {
- continue;
- }
-
- gapN++;
- }
-
- fprintf ( stderr, "\nDone with %d scaffolds, %d contigs in Scaffolld\n", index, gapN );
- /*
- if(readSeqInGap)
- freeDarray(readSeqInGap);
- */
- fclose ( fp );
- freeStack ( ctgStack );
- freeStackBuf ( ctgStackBuffer, scafBufSize );
- free ( ( void * ) ctgStackBuffer );
+ FILE *fp;
+ char line[1024];
+ CTGinSCAF *actg;
+ STACK *ctgStack, *aStack;
+ int index = 0, counter, overallLen;
+ int starter, prev_start, gapN, scafLen;
+ unsigned int ctg, prev_ctg = 0;
+
+ for ( ctg = 1; ctg <= num_ctg; ctg++ )
+ {
+ contig_array[ctg].from_vt = 0;
+ contig_array[ctg].multi = 0;
+ }
+
+ ctgStack = ( STACK * ) createStack ( 1000, sizeof ( CTGinSCAF ) );
+ sprintf ( line, "%s.scaf_gap", graphfile );
+ fp = ckopen ( line, "r" );
+ ctgStackBuffer = ( STACK ** ) ckalloc ( scafBufSize * sizeof ( STACK * ) );
+ initStackBuf ( ctgStackBuffer, scafBufSize );
+ Ncounter = scafCounter = scafInBuf = allGaps = 0;
+
+ while ( fgets ( line, sizeof ( line ), fp ) != NULL )
+ {
+ if ( line[0] == '>' )
+ {
+ if ( index )
+ {
+ aStack = ctgStackBuffer[scafInBuf++];
+ reverseStack ( aStack, ctgStack );
+
+ if ( scafInBuf == scafBufSize )
+ {
+ mapCtg2Scaf ( scafInBuf );
+ scafCounter += scafInBuf;
+ scafInBuf = 0;
+ }
+
+ if ( index % 1000 == 0 )
+ {
+ fprintf ( stderr, "Processed %d scaffolds.\n", index );
+ }
+ }
+
+ //read next scaff
+ scafLen = prev_ctg = 0;
+ emptyStack ( ctgStack );
+ sscanf ( line + 9, "%d %d %d", &index, &counter, &overallLen );
+ //fprintf(stderr,">%d\n",index);
+ continue;
+ }
+
+ if ( line[0] == 'G' ) // gap appears
+ {
+ continue;
+ }
+
+ if ( line[0] >= '0' && line[0] <= '9' ) // a contig line
+ {
+ sscanf ( line, "%d %d", &ctg, &starter );
+ actg = ( CTGinSCAF * ) stackPush ( ctgStack );
+ actg->ctgID = ctg;
+
+ if ( !prev_ctg )
+ {
+ actg->start = scafLen;
+ actg->end = actg->start + overlaplen + contig_array[ctg].length - 1;
+ }
+ else
+ {
+ gapN = starter - prev_start - ( int ) contig_array[prev_ctg].length;
+ gapN = gapN < 1 ? 1 : gapN;
+ actg->start = scafLen + gapN;
+ actg->end = actg->start + contig_array[ctg].length - 1;
+ }
+
+ //fprintf(stderr,"%d\t%d\n",actg->start,actg->end);
+ scafLen = actg->end + 1;
+ prev_ctg = ctg;
+ prev_start = starter;
+ }
+ }
+
+ if ( index )
+ {
+ aStack = ctgStackBuffer[scafInBuf++];
+ reverseStack ( aStack, ctgStack );
+ mapCtg2Scaf ( scafInBuf );
+ }
+
+ gapN = 0;
+
+ for ( ctg = 1; ctg <= num_ctg; ctg++ )
+ {
+ if ( contig_array[ctg].from_vt == 0 || contig_array[ctg].multi == 1 )
+ {
+ continue;
+ }
+
+ gapN++;
+ }
+
+ fprintf ( stderr, "\nDone with %d scaffolds, %d contigs in Scaffolld\n", index, gapN );
+ /*
+ if(readSeqInGap)
+ freeDarray(readSeqInGap);
+ */
+ fclose ( fp );
+ freeStack ( ctgStack );
+ freeStackBuf ( ctgStackBuffer, scafBufSize );
+ free ( ( void * ) ctgStackBuffer );
}
static boolean contigElligible ( unsigned int contigno )
{
- unsigned int ctg = index_array[contigno];
-
- if ( contig_array[ctg].from_vt == 0 || contig_array[ctg].multi == 1 )
- {
- return 0;
- }
- else
- {
- return 1;
- }
+ unsigned int ctg = index_array[contigno];
+
+ if ( contig_array[ctg].from_vt == 0 || contig_array[ctg].multi == 1 )
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
}
-static void output1read ( FILE * fo, long long readno, unsigned int contigno, int pos )
+static void output1read ( FILE *fo, long long readno, unsigned int contigno, int pos )
{
- unsigned int ctg = index_array[contigno];
- int posOnScaf;
- char orien;
- pos = pos < 0 ? 0 : pos;
-
- if ( contig_array[ctg].flag == 0 )
- {
- posOnScaf = contig_array[ctg].to_vt + pos - overlaplen;
- orien = '+';
- }
- else
- {
- posOnScaf = contig_array[ctg].to_vt + contig_array[ctg].length - pos;
- orien = '-';
- }
-
- /*
- if(readno==676)
- printf("Read %lld in region from %d, extend %d, pos %d, orien %c\n",
- readno,contig_array[ctg].to_vt,contig_array[ctg].length,posOnScaf,orien);
- */
- fprintf ( fo, "%lld\t%d\t%d\t%c\n", readno, contig_array[ctg].from_vt, posOnScaf, orien );
+ unsigned int ctg = index_array[contigno];
+ int posOnScaf;
+ char orien;
+ pos = pos < 0 ? 0 : pos;
+
+ if ( contig_array[ctg].flag == 0 )
+ {
+ posOnScaf = contig_array[ctg].to_vt + pos - overlaplen;
+ orien = '+';
+ }
+ else
+ {
+ posOnScaf = contig_array[ctg].to_vt + contig_array[ctg].length - pos;
+ orien = '-';
+ }
+
+ /*
+ if(readno==676)
+ printf("Read %lld in region from %d, extend %d, pos %d, orien %c\n",
+ readno,contig_array[ctg].to_vt,contig_array[ctg].length,posOnScaf,orien);
+ */
+ fprintf ( fo, "%lld\t%d\t%d\t%c\n", readno, contig_array[ctg].from_vt, posOnScaf, orien );
}
-void locateReadOnScaf ( char * graphfile )
+void locateReadOnScaf ( char *graphfile )
{
- char name[1024], line[1024];
- FILE * fp, *fo;
- long long readno, counter = 0, pre_readno = 0;
- unsigned int contigno, pre_contigno;
- int pre_pos, pos;
- locateContigOnscaff ( graphfile );
- sprintf ( name, "%s.readOnContig", graphfile );
- fp = ckopen ( name, "r" );
- sprintf ( name, "%s.readOnScaf", graphfile );
- fo = ckopen ( name, "w" );
-
- if ( !orig2new )
- {
- convertIndex ();
- orig2new = 1;
- }
-
- fgets ( line, 1024, fp );
-
- while ( fgets ( line, 1024, fp ) != NULL )
- {
- sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
-
- if ( ( readno % 2 == 0 ) && ( pre_readno == readno - 1 ) // they are a pair of reads
- && contigElligible ( pre_contigno ) && contigElligible ( contigno ) )
- {
- output1read ( fo, pre_readno, pre_contigno, pre_pos );
- output1read ( fo, readno, contigno, pos );
- counter++;
- }
-
- pre_readno = readno;
- pre_contigno = contigno;
- pre_pos = pos;
- }
-
- fprintf ( stderr, "%lld pair(s) on contig\n", counter );
- fclose ( fp );
- fclose ( fo );
+ char name[1024], line[1024];
+ FILE *fp, *fo;
+ long long readno, counter = 0, pre_readno = 0;
+ unsigned int contigno, pre_contigno;
+ int pre_pos, pos;
+ locateContigOnscaff ( graphfile );
+ sprintf ( name, "%s.readOnContig", graphfile );
+ fp = ckopen ( name, "r" );
+ sprintf ( name, "%s.readOnScaf", graphfile );
+ fo = ckopen ( name, "w" );
+
+ if ( !orig2new )
+ {
+ convertIndex ();
+ orig2new = 1;
+ }
+
+ fgets ( line, 1024, fp );
+
+ while ( fgets ( line, 1024, fp ) != NULL )
+ {
+ sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
+
+ if ( ( readno % 2 == 0 ) && ( pre_readno == readno - 1 ) // they are a pair of reads
+ && contigElligible ( pre_contigno ) && contigElligible ( contigno ) )
+ {
+ output1read ( fo, pre_readno, pre_contigno, pre_pos );
+ output1read ( fo, readno, contigno, pos );
+ counter++;
+ }
+
+ pre_readno = readno;
+ pre_contigno = contigno;
+ pre_pos = pos;
+ }
+
+ fprintf ( stderr, "%lld pair(s) on contig\n", counter );
+ fclose ( fp );
+ fclose ( fo );
}
diff --git a/standardPregraph/readInterval.c b/standardPregraph/readInterval.c
index 362c54f..2509d8d 100644
--- a/standardPregraph/readInterval.c
+++ b/standardPregraph/readInterval.c
@@ -1,7 +1,7 @@
/*
* readInterval.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -30,26 +30,26 @@
void destroyReadIntervMem ()
{
- freeMem_manager ( rv_mem_manager );
- rv_mem_manager = NULL;
+ freeMem_manager ( rv_mem_manager );
+ rv_mem_manager = NULL;
}
-READINTERVAL * allocateRV ( int readid, int edgeid )
+READINTERVAL *allocateRV ( int readid, int edgeid )
{
- READINTERVAL * newRV;
- newRV = ( READINTERVAL * ) getItem ( rv_mem_manager );
- newRV->readid = readid;
- newRV->edgeid = edgeid;
- newRV->nextInRead = NULL;
- newRV->prevInRead = NULL;
- newRV->nextOnEdge = NULL;
- newRV->prevOnEdge = NULL;
- return newRV;
+ READINTERVAL *newRV;
+ newRV = ( READINTERVAL * ) getItem ( rv_mem_manager );
+ newRV->readid = readid;
+ newRV->edgeid = edgeid;
+ newRV->nextInRead = NULL;
+ newRV->prevInRead = NULL;
+ newRV->nextOnEdge = NULL;
+ newRV->prevOnEdge = NULL;
+ return newRV;
}
-void dismissRV ( READINTERVAL * rv )
+void dismissRV ( READINTERVAL *rv )
{
- returnItem ( rv_mem_manager, rv );
+ returnItem ( rv_mem_manager, rv );
}
/*************************************************
@@ -66,12 +66,12 @@ Return:
*************************************************/
void createRVmemo ()
{
- if ( !rv_mem_manager )
- {
- rv_mem_manager = createMem_manager ( RVBLOCKSIZE, sizeof ( READINTERVAL ) );
- }
- else
- {
- fprintf ( stderr, "Warning from createRVmemo: rv_mem_manager is an active pointer.\n" );
- }
+ if ( !rv_mem_manager )
+ {
+ rv_mem_manager = createMem_manager ( RVBLOCKSIZE, sizeof ( READINTERVAL ) );
+ }
+ else
+ {
+ fprintf ( stderr, "Warning from createRVmemo: rv_mem_manager is an active pointer.\n" );
+ }
}
diff --git a/standardPregraph/readseq1by1.c b/standardPregraph/readseq1by1.c
index 9d0e655..4790bd6 100644
--- a/standardPregraph/readseq1by1.c
+++ b/standardPregraph/readseq1by1.c
@@ -1,7 +1,7 @@
/*
* readseq1by1.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -47,163 +47,165 @@ boolean change = 1;
//static boolean multi=0;
//atic long curr_pos;
-void readseq1by1 ( char * src_seq, char * src_name, int * len_seq, FILE * fp, long long num_seq )
+void readseq1by1 ( char *src_seq, char *src_name, int *len_seq, FILE *fp, long long num_seq )
{
- int i, k, n, strL;
- char c;
- int lineLen = gLineLen;
- char tmpStr[lineLen];
- char * str;
-
- if ( gStr == NULL )
- {
- str = tmpStr;
- }
- else
- {
- str = gStr;
- lineLen = maxReadLen + 1;
- }
-
- n = 0;
- k = num_seq;
-
- while ( fgets ( str, lineLen, fp ) )
- {
- if ( str[0] == '#' )
- {
- continue;
- }
-
- if ( str[0] == '>' )
- {
- /*
- if(k >= 0) { // if this isn't the first '>' in the file
- *len_seq = n;
- }
- */
- /*
- if(multi)
- {
- curr_pos=ftell(fp);
- }
- */
- *len_seq = n;
- n = 0;
- sscanf ( &str[1], "%s", src_name );
- return;
- }
- else
- {
- strL = strlen ( str );
-
- if ( strL + n > maxReadLen )
- {
- strL = maxReadLen - n;
- }
-
- for ( i = 0; i < strL; i++ )
- {
- if ( str[i] >= 'a' && str[i] <= 'z' )
- {
- c = base2int ( str[i] - 'a' + 'A' );
- src_seq[n++] = c;
- }
- else if ( str[i] >= 'A' && str[i] <= 'Z' )
- {
- c = base2int ( str[i] );
- src_seq[n++] = c;
- // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
- else if ( str[i] == '.' )
- {
- c = base2int ( 'A' );
- src_seq[n++] = c;
- } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
-
- //printf("%d: %d\n",k,n);
- }
- }
-
- if ( k >= 0 )
- {
- *len_seq = n;
- return;
- }
-
- *len_seq = 0;
+ int i, k, n, strL;
+ char c;
+ int lineLen = gLineLen;
+ char tmpStr[lineLen];
+ char *str;
+
+ if ( gStr == NULL )
+ {
+ str = tmpStr;
+ }
+ else
+ {
+ str = gStr;
+ lineLen = maxReadLen + 1;
+ }
+
+ n = 0;
+ k = num_seq;
+
+ while ( fgets ( str, lineLen, fp ) )
+ {
+ if ( str[0] == '#' )
+ {
+ continue;
+ }
+
+ if ( str[0] == '>' )
+ {
+ /*
+ if(k >= 0) { // if this isn't the first '>' in the file
+ *len_seq = n;
+ }
+ */
+ /*
+ if(multi)
+ {
+ curr_pos=ftell(fp);
+ }
+ */
+ *len_seq = n;
+ n = 0;
+ sscanf ( &str[1], "%s", src_name );
+ return;
+ }
+ else
+ {
+ strL = strlen ( str );
+
+ if ( strL + n > maxReadLen )
+ {
+ strL = maxReadLen - n;
+ }
+
+ for ( i = 0; i < strL; i++ )
+ {
+ if ( str[i] >= 'a' && str[i] <= 'z' )
+ {
+ c = base2int ( str[i] - 'a' + 'A' );
+ src_seq[n++] = c;
+ }
+ else if ( str[i] >= 'A' && str[i] <= 'Z' )
+ {
+ c = base2int ( str[i] );
+ src_seq[n++] = c;
+ // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ else if ( str[i] == '.' )
+ {
+ c = base2int ( 'A' );
+ src_seq[n++] = c;
+ } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+
+ //printf("%d: %d\n",k,n);
+ }
+ }
+
+ if ( k >= 0 )
+ {
+ *len_seq = n;
+ return;
+ }
+
+ *len_seq = 0;
}
-void readseqInBuf ( char * src_seq, char * src_name, int * len_seq, char * buf, int * start, int offset )
+void readseqInBuf ( char *src_seq, char *src_name, int *len_seq, char *buf, int *start, int offset )
{
- int i, n, strL, m, p;
- char c;
- int lineLen = gLineLen;
- char tmpStr[lineLen];
- char * str;
-
- if ( gStr == NULL )
- {
- str = tmpStr;
- }
- else
- {
- str = gStr;
- lineLen = maxReadLen + 1;
- }
-
- n = 0;
-
- for ( m = *start; m < offset; m++ )
- {
- if ( buf[m] == '>' )
- {
- p = m;
- }
- else if ( buf[m] == '\n' && buf[p] == '>' )
- {
- memcpy ( src_name, &buf[p + 1], m - p - 1 ); //get name
- p = m;
- }
- else if ( buf[m] == '\n' && buf[p] == '\n' )
- {
- memcpy ( str, &buf[p + 1], m - p - 1 ); //get seq
- //p = m;
- str[m - p - 1] = '\0';
- *start = m + 1;
- strL = strlen ( str );
-
- if ( strL + n > maxReadLen )
- { strL = maxReadLen - n; }
-
- for ( i = 0; i < strL; i++ )
- {
- if ( str[i] >= 'a' && str[i] <= 'z' )
- {
- c = base2int ( str[i] - 'a' + 'A' );
- src_seq[n++] = c;
- }
- else if ( str[i] >= 'A' && str[i] <= 'Z' )
- {
- c = base2int ( str[i] );
- src_seq[n++] = c;
- // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
- else if ( str[i] == '.' )
- {
- c = base2int ( 'A' );
- src_seq[n++] = c;
- } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
-
- break;
- //printf("%d: %d\n",k,n);
- }
- }
-
- *len_seq = n;
- return;
+ int i, n, strL, m, p;
+ char c;
+ int lineLen = gLineLen;
+ char tmpStr[lineLen];
+ char *str;
+
+ if ( gStr == NULL )
+ {
+ str = tmpStr;
+ }
+ else
+ {
+ str = gStr;
+ lineLen = maxReadLen + 1;
+ }
+
+ n = 0;
+
+ for ( m = *start; m < offset; m++ )
+ {
+ if ( buf[m] == '>' )
+ {
+ p = m;
+ }
+ else if ( buf[m] == '\n' && buf[p] == '>' )
+ {
+ memcpy ( src_name, &buf[p + 1], m - p - 1 ); //get name
+ p = m;
+ }
+ else if ( buf[m] == '\n' && buf[p] == '\n' )
+ {
+ memcpy ( str, &buf[p + 1], m - p - 1 ); //get seq
+ //p = m;
+ str[m - p - 1] = '\0';
+ *start = m + 1;
+ strL = strlen ( str );
+
+ if ( strL + n > maxReadLen )
+ {
+ strL = maxReadLen - n;
+ }
+
+ for ( i = 0; i < strL; i++ )
+ {
+ if ( str[i] >= 'a' && str[i] <= 'z' )
+ {
+ c = base2int ( str[i] - 'a' + 'A' );
+ src_seq[n++] = c;
+ }
+ else if ( str[i] >= 'A' && str[i] <= 'Z' )
+ {
+ c = base2int ( str[i] );
+ src_seq[n++] = c;
+ // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ else if ( str[i] == '.' )
+ {
+ c = base2int ( 'A' );
+ src_seq[n++] = c;
+ } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+
+ break;
+ //printf("%d: %d\n",k,n);
+ }
+ }
+
+ *len_seq = n;
+ return;
}
/*************************************************
@@ -220,1037 +222,1109 @@ Output:
Return:
The contig number.
*************************************************/
-long long readseqpar ( int * max_leg, int * min_leg, int * max_name_leg, FILE * fp )
+long long readseqpar ( int *max_leg, int *min_leg, int *max_name_leg, FILE *fp )
{
- int l, n;
- long long k;
- char str[5000], src_name[5000];
- n = 0;
- k = -1;
-
- while ( fgets ( str, 4950, fp ) )
- {
- if ( str[0] == '>' )
- {
- if ( k >= 0 )
- {
- if ( n > *max_leg )
- {
- *max_leg = n;
- }
-
- if ( n < *min_leg )
- {
- *min_leg = n;
- }
- }
-
- n = 0;
- k++;
- sscanf ( &str[1], "%s", src_name );
-
- if ( ( l = strlen ( src_name ) ) > *max_name_leg )
- {
- *max_name_leg = l;
- }
- }
- else
- {
- n += strlen ( str ) - 1;
- }
- }
-
- if ( n > *max_leg )
- {
- *max_leg = n;
- }
-
- if ( n < *min_leg )
- {
- *min_leg = n;
- }
-
- k++;
- return ( k );
+ int l, n;
+ long long k;
+ char str[5000], src_name[5000];
+ n = 0;
+ k = -1;
+
+ while ( fgets ( str, 4950, fp ) )
+ {
+ if ( str[0] == '>' )
+ {
+ if ( k >= 0 )
+ {
+ if ( n > *max_leg )
+ {
+ *max_leg = n;
+ }
+
+ if ( n < *min_leg )
+ {
+ *min_leg = n;
+ }
+ }
+
+ n = 0;
+ k++;
+ sscanf ( &str[1], "%s", src_name );
+
+ if ( ( l = strlen ( src_name ) ) > *max_name_leg )
+ {
+ *max_name_leg = l;
+ }
+ }
+ else
+ {
+ n += strlen ( str ) - 1;
+ }
+ }
+
+ if ( n > *max_leg )
+ {
+ *max_leg = n;
+ }
+
+ if ( n < *min_leg )
+ {
+ *min_leg = n;
+ }
+
+ k++;
+ return ( k );
}
-void readseqfq ( char * src_seq, char * src_name, int * len_seq, char * buf, int * start, int offset )
+void readseqfq ( char *src_seq, char *src_name, int *len_seq, char *buf, int *start, int offset )
{
- int i, n, strL, m, p = 0;
- char c;
- int lineLen = gLineLen;
- char tmpStr[lineLen];
- char * str;
-
- if ( gStr == NULL )
- {
- str = tmpStr;
- }
- else
- {
- str = gStr;
- lineLen = maxReadLen + 1;
- }
-
- n = 0;
-
- for ( m = *start; m < offset; m++ )
- {
- if ( buf[m] == '@' )
- { p = m; }
- else if ( buf[m] == '\n' && buf[p] == '@' )
- {
- memcpy ( src_name, &buf[p + 1], m - p - 1 );
- p = m;
- }
- else if ( buf[m] == '\n' && buf[p] == '\n' && m > p )
- {
- //p = m;
- memcpy ( str, &buf[p + 1], m - p - 1 );
- str[m - p - 1] = '\0';
- strL = strlen ( str );
-
- if ( strL + n > maxReadLen )
- { strL = maxReadLen - n; }
-
- for ( i = 0; i < strL; i++ )
- {
- if ( str[i] >= 'a' && str[i] <= 'z' )
- {
- c = base2int ( str[i] - 'a' + 'A' );
- src_seq[n++] = c;
- }
- else if ( str[i] >= 'A' && str[i] <= 'Z' )
- {
- c = base2int ( str[i] );
- src_seq[n++] = c;
- // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
- else if ( str[i] == '.' )
- {
- c = base2int ( 'A' );
- src_seq[n++] = c;
- }
- }
-
- for ( m++; buf[m] != '\n'; m++ ) { ; }
-
- *start = ( m + 2 + strlen ( str ) );
- break;
- }
-
- /* else if (buf[m] == '\n' && buf[p] == '+' && buf[m - 1] != '+')
- {
- *start = m + 1;
- break;
- }*/
- }
-
- *len_seq = n;
- return;
+ int i, n, strL, m, p = 0;
+ char c;
+ int lineLen = gLineLen;
+ char tmpStr[lineLen];
+ char *str;
+
+ if ( gStr == NULL )
+ {
+ str = tmpStr;
+ }
+ else
+ {
+ str = gStr;
+ lineLen = maxReadLen + 1;
+ }
+
+ n = 0;
+
+ for ( m = *start; m < offset; m++ )
+ {
+ if ( buf[m] == '@' )
+ {
+ p = m;
+ }
+ else if ( buf[m] == '\n' && buf[p] == '@' )
+ {
+ memcpy ( src_name, &buf[p + 1], m - p - 1 );
+ p = m;
+ }
+ else if ( buf[m] == '\n' && buf[p] == '\n' && m > p )
+ {
+ //p = m;
+ memcpy ( str, &buf[p + 1], m - p - 1 );
+ str[m - p - 1] = '\0';
+ strL = strlen ( str );
+
+ if ( strL + n > maxReadLen )
+ {
+ strL = maxReadLen - n;
+ }
+
+ for ( i = 0; i < strL; i++ )
+ {
+ if ( str[i] >= 'a' && str[i] <= 'z' )
+ {
+ c = base2int ( str[i] - 'a' + 'A' );
+ src_seq[n++] = c;
+ }
+ else if ( str[i] >= 'A' && str[i] <= 'Z' )
+ {
+ c = base2int ( str[i] );
+ src_seq[n++] = c;
+ // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ else if ( str[i] == '.' )
+ {
+ c = base2int ( 'A' );
+ src_seq[n++] = c;
+ }
+ }
+
+ for ( m++; buf[m] != '\n'; m++ )
+ {
+ ;
+ }
+
+ *start = ( m + 2 + strlen ( str ) );
+ break;
+ }
+
+ /* else if (buf[m] == '\n' && buf[p] == '+' && buf[m - 1] != '+')
+ {
+ *start = m + 1;
+ break;
+ }*/
+ }
+
+ *len_seq = n;
+ return;
}
-void read1seqfq ( char * src_seq, char * src_name, int * len_seq, FILE * fp )
+void read1seqfq ( char *src_seq, char *src_name, int *len_seq, FILE *fp )
{
- int i, n, strL;
- char c;
- int lineLen = gLineLen;
- char tmpStr[lineLen];
- char * str;
-
- if ( gStr == NULL )
- {
- str = tmpStr;
- }
- else
- {
- str = gStr;
- lineLen = maxReadLen + 1;
- }
-
- boolean flag = 0;
-
- while ( fgets ( str, lineLen, fp ) )
- {
- if ( str[0] == '@' )
- {
- flag = 1;
- sscanf ( &str[1], "%s", src_name );
- break;
- }
- }
-
- if ( !flag ) //last time reading fq file get this
- {
- *len_seq = 0;
- return;
- }
-
- /*
- if(multi)
- {
- curr_pos=ftell(fp);
- }
- */
- n = 0;
-
- while ( fgets ( str, lineLen, fp ) )
- {
- if ( str[0] == '+' )
- {
- fgets ( str, lineLen, fp ); // pass quality value line
- *len_seq = n;
- return;
- }
- else
- {
- strL = strlen ( str );
-
- if ( strL + n > maxReadLen )
- {
- strL = maxReadLen - n;
- }
-
- for ( i = 0; i < strL; i++ )
- {
- if ( str[i] >= 'a' && str[i] <= 'z' )
- {
- c = base2int ( str[i] - 'a' + 'A' );
- src_seq[n++] = c;
- }
- else if ( str[i] >= 'A' && str[i] <= 'Z' )
- {
- c = base2int ( str[i] );
- src_seq[n++] = c;
- // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
- else if ( str[i] == '.' )
- {
- c = base2int ( 'A' );
- src_seq[n++] = c;
- } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
- }
- }
-
- *len_seq = n;
- return;
+ int i, n, strL;
+ char c;
+ int lineLen = gLineLen;
+ char tmpStr[lineLen];
+ char *str;
+
+ if ( gStr == NULL )
+ {
+ str = tmpStr;
+ }
+ else
+ {
+ str = gStr;
+ lineLen = maxReadLen + 1;
+ }
+
+ boolean flag = 0;
+
+ while ( fgets ( str, lineLen, fp ) )
+ {
+ if ( str[0] == '@' )
+ {
+ flag = 1;
+ sscanf ( &str[1], "%s", src_name );
+ break;
+ }
+ }
+
+ if ( !flag ) //last time reading fq file get this
+ {
+ *len_seq = 0;
+ return;
+ }
+
+ /*
+ if(multi)
+ {
+ curr_pos=ftell(fp);
+ }
+ */
+ n = 0;
+
+ while ( fgets ( str, lineLen, fp ) )
+ {
+ if ( str[0] == '+' )
+ {
+ fgets ( str, lineLen, fp ); // pass quality value line
+ *len_seq = n;
+ return;
+ }
+ else
+ {
+ strL = strlen ( str );
+
+ if ( strL + n > maxReadLen )
+ {
+ strL = maxReadLen - n;
+ }
+
+ for ( i = 0; i < strL; i++ )
+ {
+ if ( str[i] >= 'a' && str[i] <= 'z' )
+ {
+ c = base2int ( str[i] - 'a' + 'A' );
+ src_seq[n++] = c;
+ }
+ else if ( str[i] >= 'A' && str[i] <= 'Z' )
+ {
+ c = base2int ( str[i] );
+ src_seq[n++] = c;
+ // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ else if ( str[i] == '.' )
+ {
+ c = base2int ( 'A' );
+ src_seq[n++] = c;
+ } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ }
+ }
+
+ *len_seq = n;
+ return;
}
-void read1seqbam ( char * src_seq, char * src_name, int * len_seq, samfile_t * in, int * type, int asm_flag ) //read one sequence from bam file
+void read1seqbam ( char *src_seq, char *src_name, int *len_seq, samfile_t *in, int *type, int asm_flag ) //read one sequence from bam file
{
- bam1_t * b = bam_init1 ();
- char c;
- char * line1 = NULL;
- int n = 0;
- int len;
- int i, j;
- char * seq1;
- unsigned int flag1 = 0;
- *type = 0;
- readstate = 0;
- boolean isGood = true;
-
- // if ((readstate = samread (in, b)) >= 0)
- while ( ( readstate = samread ( in, b ) ) >= 0 )
- {
- if ( !__g_skip_aln ( in->header, b ) )
- {
- line1 = bam_format1_core ( in->header, b, in->type >> 2 & 3 );
- }
-
- seq1 = strtok ( line1, "\t" );
-
- for ( i = 0; i < 10; i++ )
- {
- if ( i == 0 )
- {
- sscanf ( seq1, "%s", src_name );
- }
- else if ( i == 1 )
- {
- flag1 = atoi ( seq1 );
-
- if ( flag1 & 0x0200 ) //whether it's good or not
- {
- if ( asm_flag == 1 )
- {
- isGood = false;
- break;
- }
-
- switch ( state )
- {
- case -3:
- state = -2;
- break;
- case -2:
- state = 0;
- break;
- case -1:
- state = 2;
- break;
- default:
- state = -3;
- }
- }
- else
- {
- isGood = true;
-
- switch ( state )
- {
- case -3:
- state = -1;
- break;
- case -2:
- state = 1;
- break;
- case -1:
- state = 3;
- break;
- default:
- state = -3;
- }
- }
- }
- else if ( i == 9 ) //the sequence
- {
- len = strlen ( seq1 );
-
- if ( len + n > maxReadLen )
- { len = maxReadLen - n; }
-
- for ( j = 0; j < len; j++ )
- {
- if ( seq1[j] >= 'a' && seq1[j] <= 'z' )
- {
- c = base2int ( seq1[j] - 'a' + 'A' );
- src_seq[n++] = c;
- }
- else if ( seq1[j] >= 'A' && seq1[j] <= 'Z' )
- {
- c = base2int ( seq1[j] );
- src_seq[n++] = c;
- // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
- else if ( seq1[j] == '.' )
- {
- c = base2int ( 'A' );
- src_seq[n++] = c;
- } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
- }
-
- if ( 3 == state )
- {
- state = -3;
- }
- else
- {
- if ( 0 == state || 1 == state || 2 == state )
- {
- state = -3;
- *type = -1;
- }
- }
- }
-
- seq1 = strtok ( NULL, "\t" );
- }
-
- if ( isGood )
- { break; }
- }
-
- if ( readstate < 0 )
- { state = -3; }
-
- free ( line1 );
- bam_destroy1 ( b );
- *len_seq = n;
+ bam1_t *b = bam_init1 ();
+ char c;
+ char *line1 = NULL;
+ int n = 0;
+ int len;
+ int i, j;
+ char *seq1;
+ unsigned int flag1 = 0;
+ *type = 0;
+ readstate = 0;
+ boolean isGood = true;
+
+ // if ((readstate = samread (in, b)) >= 0)
+ while ( ( readstate = samread ( in, b ) ) >= 0 )
+ {
+ if ( !__g_skip_aln ( in->header, b ) )
+ {
+ line1 = bam_format1_core ( in->header, b, in->type >> 2 & 3 );
+ }
+
+ seq1 = strtok ( line1, "\t" );
+
+ for ( i = 0; i < 10; i++ )
+ {
+ if ( i == 0 )
+ {
+ sscanf ( seq1, "%s", src_name );
+ }
+ else if ( i == 1 )
+ {
+ flag1 = atoi ( seq1 );
+
+ if ( flag1 & 0x0200 ) //whether it's good or not
+ {
+ if ( asm_flag == 1 )
+ {
+ isGood = false;
+ break;
+ }
+
+ switch ( state )
+ {
+ case -3:
+ state = -2;
+ break;
+
+ case -2:
+ state = 0;
+ break;
+
+ case -1:
+ state = 2;
+ break;
+
+ default:
+ state = -3;
+ }
+ }
+ else
+ {
+ isGood = true;
+
+ switch ( state )
+ {
+ case -3:
+ state = -1;
+ break;
+
+ case -2:
+ state = 1;
+ break;
+
+ case -1:
+ state = 3;
+ break;
+
+ default:
+ state = -3;
+ }
+ }
+ }
+ else if ( i == 9 ) //the sequence
+ {
+ len = strlen ( seq1 );
+
+ if ( len + n > maxReadLen )
+ {
+ len = maxReadLen - n;
+ }
+
+ for ( j = 0; j < len; j++ )
+ {
+ if ( seq1[j] >= 'a' && seq1[j] <= 'z' )
+ {
+ c = base2int ( seq1[j] - 'a' + 'A' );
+ src_seq[n++] = c;
+ }
+ else if ( seq1[j] >= 'A' && seq1[j] <= 'Z' )
+ {
+ c = base2int ( seq1[j] );
+ src_seq[n++] = c;
+ // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+ else if ( seq1[j] == '.' )
+ {
+ c = base2int ( 'A' );
+ src_seq[n++] = c;
+ } // after pre-process all the symbles would be a,g,c,t,n in lower or upper case.
+ }
+
+ if ( 3 == state )
+ {
+ state = -3;
+ }
+ else
+ {
+ if ( 0 == state || 1 == state || 2 == state )
+ {
+ state = -3;
+ *type = -1;
+ }
+ }
+ }
+
+ seq1 = strtok ( NULL, "\t" );
+ }
+
+ if ( isGood )
+ {
+ break;
+ }
+ }
+
+ if ( readstate < 0 )
+ {
+ state = -3;
+ }
+
+ free ( line1 );
+ bam_destroy1 ( b );
+ *len_seq = n;
}
// find the next file to open in libs
int nextValidIndex ( int libNo, boolean pair, unsigned char asm_ctg )
{
- int i = libNo;
-
- while ( i < num_libs )
- {
- if ( asm_ctg == 1 && ( lib_array[i].asm_flag != 1 && lib_array[i].asm_flag != 3 ) )
- {
- i++;
- continue;
- }
- else if ( asm_ctg == 0 && ( lib_array[i].asm_flag != 2 && lib_array[i].asm_flag != 3 ) )
- {
- i++;
- continue;
- }
- else if ( asm_ctg > 1 && lib_array[i].asm_flag != asm_ctg ) // reads for other purpose
- {
- i++;
- continue;
- }
-
- if ( lib_array[i].curr_type == 1 && lib_array[i].curr_index < lib_array[i].num_a1_file )
- {
- return i;
- }
-
- if ( lib_array[i].curr_type == 2 && lib_array[i].curr_index < lib_array[i].num_q1_file )
- {
- return i;
- }
-
- if ( lib_array[i].curr_type == 3 && lib_array[i].curr_index < lib_array[i].num_p_file )
- {
- return i;
- }
-
- if ( lib_array[i].curr_type == 4 && lib_array[i].curr_index < lib_array[i].num_b_file )
- {
- return i;
- }
-
- if ( pair )
- {
- if ( lib_array[i].curr_type < 4 )
- {
- lib_array[i].curr_type++;
- lib_array[i].curr_index = 0;
- }
- else
- {
- i++;
- }
-
- continue;
- }
-
- if ( lib_array[i].curr_type == 5 && lib_array[i].curr_index < lib_array[i].num_s_a_file )
- {
- return i;
- }
-
- if ( lib_array[i].curr_type == 6 && lib_array[i].curr_index < lib_array[i].num_s_q_file )
- {
- return i;
- }
-
- if ( lib_array[i].curr_type < 6 )
- {
- lib_array[i].curr_type++;
- lib_array[i].curr_index = 0;
- }
- else
- {
- i++;
- }
- } //for each lib
-
- return i;
+ int i = libNo;
+
+ while ( i < num_libs )
+ {
+ if ( asm_ctg == 1 && ( lib_array[i].asm_flag != 1 && lib_array[i].asm_flag != 3 ) )
+ {
+ i++;
+ continue;
+ }
+ else if ( asm_ctg == 0 && ( lib_array[i].asm_flag != 2 && lib_array[i].asm_flag != 3 ) )
+ {
+ i++;
+ continue;
+ }
+ else if ( asm_ctg > 1 && lib_array[i].asm_flag != asm_ctg ) // reads for other purpose
+ {
+ i++;
+ continue;
+ }
+
+ if ( lib_array[i].curr_type == 1 && lib_array[i].curr_index < lib_array[i].num_a1_file )
+ {
+ return i;
+ }
+
+ if ( lib_array[i].curr_type == 2 && lib_array[i].curr_index < lib_array[i].num_q1_file )
+ {
+ return i;
+ }
+
+ if ( lib_array[i].curr_type == 3 && lib_array[i].curr_index < lib_array[i].num_p_file )
+ {
+ return i;
+ }
+
+ if ( lib_array[i].curr_type == 4 && lib_array[i].curr_index < lib_array[i].num_b_file )
+ {
+ return i;
+ }
+
+ if ( pair )
+ {
+ if ( lib_array[i].curr_type < 4 )
+ {
+ lib_array[i].curr_type++;
+ lib_array[i].curr_index = 0;
+ }
+ else
+ {
+ i++;
+ }
+
+ continue;
+ }
+
+ if ( lib_array[i].curr_type == 5 && lib_array[i].curr_index < lib_array[i].num_s_a_file )
+ {
+ return i;
+ }
+
+ if ( lib_array[i].curr_type == 6 && lib_array[i].curr_index < lib_array[i].num_s_q_file )
+ {
+ return i;
+ }
+
+ if ( lib_array[i].curr_type < 6 )
+ {
+ lib_array[i].curr_type++;
+ lib_array[i].curr_index = 0;
+ }
+ else
+ {
+ i++;
+ }
+ } //for each lib
+
+ return i;
}
-static FILE * openFile4read ( char * fname )
+static FILE *openFile4read ( char *fname )
{
- FILE * fp;
- int i, j = 0;
- char str_sub[4];
-
- for ( i = strlen ( fname ) - 1; i >= 0; i-- )
- {
- if ( fname[i] == ' ' ) { j++; }
- else { break; }
- }
-
- for ( i = 0; i < 3; i++ ) { str_sub[i] = fname[strlen ( fname ) - 3 - j + i]; }
-
- str_sub[3] = '\0';
-
- if ( strlen ( fname ) > 3 && strcmp ( str_sub, ".gz" ) == 0 )
- {
- char * cmd = ( char * ) ckalloc ( ( strlen ( fname ) + 20 ) * sizeof ( char ) );
- sprintf ( cmd, "gzip -dc %s", fname );
- fp = popen ( cmd, "r" );
- free ( cmd );
- return fp;
- }
- else
- {
- fname[strlen ( fname ) - j] = '\0';
- return ckopen ( fname, "r" );
- }
+ FILE *fp;
+ int i, j = 0;
+ char str_sub[4];
+
+ for ( i = strlen ( fname ) - 1; i >= 0; i-- )
+ {
+ if ( fname[i] == ' ' )
+ {
+ j++;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ for ( i = 0; i < 3; i++ )
+ {
+ str_sub[i] = fname[strlen ( fname ) - 3 - j + i];
+ }
+
+ str_sub[3] = '\0';
+
+ if ( strlen ( fname ) > 3 && strcmp ( str_sub, ".gz" ) == 0 )
+ {
+ char *cmd = ( char * ) ckalloc ( ( strlen ( fname ) + 20 ) * sizeof ( char ) );
+ sprintf ( cmd, "gzip -dc %s", fname );
+ fp = popen ( cmd, "r" );
+ free ( cmd );
+ return fp;
+ }
+ else
+ {
+ fname[strlen ( fname ) - j] = '\0';
+ return ckopen ( fname, "r" );
+ }
}
-static samfile_t * openFile4readb ( char * fname ) //open file to read bam file
+static samfile_t *openFile4readb ( char *fname ) //open file to read bam file
{
- samfile_t * in;
- char * fn_list = 0;
-
- if ( ( in = ( samfile_t * ) samopen ( fname, "rb", fn_list ) ) == 0 )
- {
- fprintf ( stderr, "Cannot open %s. Now exit to system...\n", fname );
- exit ( -1 );
- }
-
- if ( in->header == 0 )
- {
- fprintf ( stderr, "Cannot read the header.\n" );
- exit ( -1 );
- }
-
- return ( in );
+ samfile_t *in;
+ char *fn_list = 0;
+
+ if ( ( in = ( samfile_t * ) samopen ( fname, "rb", fn_list ) ) == 0 )
+ {
+ fprintf ( stderr, "Cannot open %s. Now exit to system...\n", fname );
+ exit ( -1 );
+ }
+
+ if ( in->header == 0 )
+ {
+ fprintf ( stderr, "Cannot read the header.\n" );
+ exit ( -1 );
+ }
+
+ return ( in );
}
void openFileInLib ( int libNo )
{
- int i = libNo;
-
- if ( lib_array[i].curr_type == 1 )
- {
- fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].a1_fname[lib_array[i].curr_index] );
- fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].a2_fname[lib_array[i].curr_index] );
- lib_array[i].fp1 = openFile4read ( lib_array[i].a1_fname[lib_array[i].curr_index] );
- lib_array[i].fp2 = openFile4read ( lib_array[i].a2_fname[lib_array[i].curr_index] );
- lib_array[i].curr_index++;
- lib_array[i].paired = 1;
- }
- else if ( lib_array[i].curr_type == 2 )
- {
- fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].q1_fname[lib_array[i].curr_index] );
- fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].q2_fname[lib_array[i].curr_index] );
- lib_array[i].fp1 = openFile4read ( lib_array[i].q1_fname[lib_array[i].curr_index] );
- lib_array[i].fp2 = openFile4read ( lib_array[i].q2_fname[lib_array[i].curr_index] );
- lib_array[i].curr_index++;
- lib_array[i].paired = 1;
- }
- else if ( lib_array[i].curr_type == 3 )
- {
- fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].p_fname[lib_array[i].curr_index] );
- lib_array[i].fp1 = openFile4read ( lib_array[i].p_fname[lib_array[i].curr_index] );
- lib_array[i].curr_index++;
- lib_array[i].paired = 0;
- }
- else if ( lib_array[i].curr_type == 5 )
- {
- fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].s_a_fname[lib_array[i].curr_index] );
- lib_array[i].fp1 = openFile4read ( lib_array[i].s_a_fname[lib_array[i].curr_index] );
- lib_array[i].curr_index++;
- lib_array[i].paired = 0;
- }
- else if ( lib_array[i].curr_type == 6 )
- {
- fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].s_q_fname[lib_array[i].curr_index] );
- lib_array[i].fp1 = openFile4read ( lib_array[i].s_q_fname[lib_array[i].curr_index] );
- lib_array[i].curr_index++;
- lib_array[i].paired = 0;
- }
- else if ( lib_array[i].curr_type == 4 )
- {
- fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].b_fname[lib_array[i].curr_index] );
- lib_array[i].fp3 = openFile4readb ( lib_array[i].b_fname[lib_array[i].curr_index] );
- lib_array[i].curr_index++;
- lib_array[i].paired = 0;
- }
+ int i = libNo;
+
+ if ( lib_array[i].curr_type == 1 )
+ {
+ fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].a1_fname[lib_array[i].curr_index] );
+ fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].a2_fname[lib_array[i].curr_index] );
+ lib_array[i].fp1 = openFile4read ( lib_array[i].a1_fname[lib_array[i].curr_index] );
+ lib_array[i].fp2 = openFile4read ( lib_array[i].a2_fname[lib_array[i].curr_index] );
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 1;
+ }
+ else if ( lib_array[i].curr_type == 2 )
+ {
+ fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].q1_fname[lib_array[i].curr_index] );
+ fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].q2_fname[lib_array[i].curr_index] );
+ lib_array[i].fp1 = openFile4read ( lib_array[i].q1_fname[lib_array[i].curr_index] );
+ lib_array[i].fp2 = openFile4read ( lib_array[i].q2_fname[lib_array[i].curr_index] );
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 1;
+ }
+ else if ( lib_array[i].curr_type == 3 )
+ {
+ fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].p_fname[lib_array[i].curr_index] );
+ lib_array[i].fp1 = openFile4read ( lib_array[i].p_fname[lib_array[i].curr_index] );
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 0;
+ }
+ else if ( lib_array[i].curr_type == 5 )
+ {
+ fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].s_a_fname[lib_array[i].curr_index] );
+ lib_array[i].fp1 = openFile4read ( lib_array[i].s_a_fname[lib_array[i].curr_index] );
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 0;
+ }
+ else if ( lib_array[i].curr_type == 6 )
+ {
+ fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].s_q_fname[lib_array[i].curr_index] );
+ lib_array[i].fp1 = openFile4read ( lib_array[i].s_q_fname[lib_array[i].curr_index] );
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 0;
+ }
+ else if ( lib_array[i].curr_type == 4 )
+ {
+ fprintf ( stderr, "Import reads from file:\n %s\n", lib_array[i].b_fname[lib_array[i].curr_index] );
+ lib_array[i].fp3 = openFile4readb ( lib_array[i].b_fname[lib_array[i].curr_index] );
+ lib_array[i].curr_index++;
+ lib_array[i].paired = 0;
+ }
}
-static void reverse2k ( char * src_seq, int len_seq )
+static void reverse2k ( char *src_seq, int len_seq )
{
- if ( !len_seq )
- {
- return;
- }
-
- int i;
- reverseComplementSeq ( src_seq, len_seq, src_rc_seq );
-
- for ( i = 0; i < len_seq; i++ )
- {
- src_seq[i] = src_rc_seq[i];
- }
+ if ( !len_seq )
+ {
+ return;
+ }
+
+ int i;
+ reverseComplementSeq ( src_seq, len_seq, src_rc_seq );
+
+ for ( i = 0; i < len_seq; i++ )
+ {
+ src_seq[i] = src_rc_seq[i];
+ }
}
void closeFp1InLab ( int libNo )
{
- int ftype = lib_array[libNo].curr_type;
- int index = lib_array[libNo].curr_index - 1;
- char * fname;
- int i, j = 0;
- char str_sub[4];
-
- if ( ftype == 1 )
- {
- fname = lib_array[libNo].a1_fname[index];
- }
- else if ( ftype == 2 )
- {
- fname = lib_array[libNo].q1_fname[index];
- }
- else if ( ftype == 3 )
- {
- fname = lib_array[libNo].p_fname[index];
- }
- else if ( ftype == 5 )
- {
- fname = lib_array[libNo].s_a_fname[index];
- }
- else if ( ftype == 6 )
- {
- fname = lib_array[libNo].s_q_fname[index];
- }
- else if ( ftype == 4 )
- {
- fname = lib_array[libNo].b_fname[index];
- }
- else
- {
- return;
- }
-
- if ( ftype == 4 )
- {
- samclose ( lib_array[libNo].fp3 ); //close file3
- }
- else
- {
- for ( i = strlen ( fname ) - 1; i >= 0; i-- )
- {
- if ( fname[i] == ' ' ) { j++; }
- else { break; }
- }
-
- for ( i = 0; i < 3; i++ ) { str_sub[i] = fname[strlen ( fname ) - 3 - j + i]; }
-
- str_sub[3] = '\0';
-
- if ( strlen ( fname ) > 3 && strcmp ( str_sub, ".gz" ) == 0 )
- {
- pclose ( lib_array[libNo].fp1 );
- }
- else
- {
- fclose ( lib_array[libNo].fp1 );
- }
- }
+ int ftype = lib_array[libNo].curr_type;
+ int index = lib_array[libNo].curr_index - 1;
+ char *fname;
+ int i, j = 0;
+ char str_sub[4];
+
+ if ( ftype == 1 )
+ {
+ fname = lib_array[libNo].a1_fname[index];
+ }
+ else if ( ftype == 2 )
+ {
+ fname = lib_array[libNo].q1_fname[index];
+ }
+ else if ( ftype == 3 )
+ {
+ fname = lib_array[libNo].p_fname[index];
+ }
+ else if ( ftype == 5 )
+ {
+ fname = lib_array[libNo].s_a_fname[index];
+ }
+ else if ( ftype == 6 )
+ {
+ fname = lib_array[libNo].s_q_fname[index];
+ }
+ else if ( ftype == 4 )
+ {
+ fname = lib_array[libNo].b_fname[index];
+ }
+ else
+ {
+ return;
+ }
+
+ if ( ftype == 4 )
+ {
+ samclose ( lib_array[libNo].fp3 ); //close file3
+ }
+ else
+ {
+ for ( i = strlen ( fname ) - 1; i >= 0; i-- )
+ {
+ if ( fname[i] == ' ' )
+ {
+ j++;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ for ( i = 0; i < 3; i++ )
+ {
+ str_sub[i] = fname[strlen ( fname ) - 3 - j + i];
+ }
+
+ str_sub[3] = '\0';
+
+ if ( strlen ( fname ) > 3 && strcmp ( str_sub, ".gz" ) == 0 )
+ {
+ pclose ( lib_array[libNo].fp1 );
+ }
+ else
+ {
+ fclose ( lib_array[libNo].fp1 );
+ }
+ }
}
void closeFp2InLab ( int libNo )
{
- int ftype = lib_array[libNo].curr_type;
- int index = lib_array[libNo].curr_index - 1;
- char * fname;
- int i, j = 0;
- char str_sub[4];
-
- if ( ftype == 1 )
- {
- fname = lib_array[libNo].a2_fname[index];
- }
- else if ( ftype == 2 )
- {
- fname = lib_array[libNo].q2_fname[index];
- }
- else
- {
- return;
- }
-
- for ( i = strlen ( fname ) - 1; i >= 0; i-- )
- {
- if ( fname[i] == ' ' ) { j++; }
- else { break; }
- }
-
- for ( i = 0; i < 3; i++ ) { str_sub[i] = fname[strlen ( fname ) - 3 - j + i]; }
-
- str_sub[3] = '\0';
-
- if ( strlen ( fname ) > 3 && strcmp ( str_sub, ".gz" ) == 0 )
- {
- pclose ( lib_array[libNo].fp2 );
- }
- else
- {
- fclose ( lib_array[libNo].fp2 );
- }
+ int ftype = lib_array[libNo].curr_type;
+ int index = lib_array[libNo].curr_index - 1;
+ char *fname;
+ int i, j = 0;
+ char str_sub[4];
+
+ if ( ftype == 1 )
+ {
+ fname = lib_array[libNo].a2_fname[index];
+ }
+ else if ( ftype == 2 )
+ {
+ fname = lib_array[libNo].q2_fname[index];
+ }
+ else
+ {
+ return;
+ }
+
+ for ( i = strlen ( fname ) - 1; i >= 0; i-- )
+ {
+ if ( fname[i] == ' ' )
+ {
+ j++;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ for ( i = 0; i < 3; i++ )
+ {
+ str_sub[i] = fname[strlen ( fname ) - 3 - j + i];
+ }
+
+ str_sub[3] = '\0';
+
+ if ( strlen ( fname ) > 3 && strcmp ( str_sub, ".gz" ) == 0 )
+ {
+ pclose ( lib_array[libNo].fp2 );
+ }
+ else
+ {
+ fclose ( lib_array[libNo].fp2 );
+ }
}
-boolean readseqInLib ( char * src_seq, char * src_name, int * len_seq, char * buf, int * start, int offset, int i )
+boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, int *start, int offset, int i )
{
- if ( lib_array[i].curr_type == 1 )
- {
- if ( lib_array[i].paired == 1 )
- {
- readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
-
- if ( lib_array[i].reverse )
- { reverse2k ( src_seq, *len_seq ); }
-
- lib_array[i].paired = 2;
- n_solexa++;
- return 1;
- }
- else
- {
- readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
-
- if ( lib_array[i].reverse )
- { reverse2k ( src_seq, *len_seq ); }
-
- lib_array[i].paired = 1;
- n_solexa++;
- return 1; //can't fail to read a read2
- }
- }
-
- if ( lib_array[i].curr_type == 2 )
- {
- if ( lib_array[i].paired == 1 )
- {
- readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
-
- if ( lib_array[i].reverse )
- { reverse2k ( src_seq, *len_seq ); }
-
- lib_array[i].paired = 2;
- n_solexa++;
- return 1;
- }
- else
- {
- readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
-
- if ( lib_array[i].reverse )
- { reverse2k ( src_seq, *len_seq ); }
-
- lib_array[i].paired = 1;
- n_solexa++;
- return 1; //can't fail to read a read2
- }
- }
-
- if ( lib_array[i].curr_type == 6 )
- { readseqfq ( src_seq, src_name, len_seq, buf, start, offset ); }
- else
- {
- readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
- }
-
- /*
- int t;
- for(t=0;t<*len_seq;t++)
- printf("%d",src_seq[t]);
- printf("\n");
- */
- if ( lib_array[i].reverse )
- { reverse2k ( src_seq, *len_seq ); }
-
- n_solexa++;
- return 1;
+ if ( lib_array[i].curr_type == 1 )
+ {
+ if ( lib_array[i].paired == 1 )
+ {
+ readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
+
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ lib_array[i].paired = 2;
+ n_solexa++;
+ return 1;
+ }
+ else
+ {
+ readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
+
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ lib_array[i].paired = 1;
+ n_solexa++;
+ return 1; //can't fail to read a read2
+ }
+ }
+
+ if ( lib_array[i].curr_type == 2 )
+ {
+ if ( lib_array[i].paired == 1 )
+ {
+ readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
+
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ lib_array[i].paired = 2;
+ n_solexa++;
+ return 1;
+ }
+ else
+ {
+ readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
+
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ lib_array[i].paired = 1;
+ n_solexa++;
+ return 1; //can't fail to read a read2
+ }
+ }
+
+ if ( lib_array[i].curr_type == 6 )
+ {
+ readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
+ }
+ else
+ {
+ readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
+ }
+
+ /*
+ int t;
+ for(t=0;t<*len_seq;t++)
+ printf("%d",src_seq[t]);
+ printf("\n");
+ */
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ n_solexa++;
+ return 1;
}
-boolean read1seqInLib ( char * src_seq, char * src_name, int * len_seq, int * libNo, boolean pair, unsigned char asm_ctg, int * type )
+boolean read1seqInLib ( char *src_seq, char *src_name, int *len_seq, int *libNo, boolean pair, unsigned char asm_ctg, int *type )
{
- int i = *libNo;
- int prevLib = i;
-
- if ( ( ( lib_array[i].curr_type != 4 ) && !lib_array[i].fp1 ) // file1 does not exist
- || ( ( lib_array[i].curr_type == 4 ) && ( lib_array[i].fp3 == NULL ) ) //file3 does not exist
- || ( ( lib_array[i].curr_type == 4 ) && readstate < 0 ) //file3 reaches end
- || ( ( lib_array[i].curr_type == 1 ) && feof ( lib_array[i].fp1 ) && feof ( lib_array[i].fp2 ) ) //f1 && f2 reach end
- || ( ( lib_array[i].curr_type == 2 ) && ( feof ( lib_array[i].fp1 ) || feof ( lib_array[i].fp2 ) )) //f1||f2 reaches end
- || ( ( lib_array[i].curr_type != 1 && lib_array[i].curr_type != 2 ) && ( lib_array[i].curr_type != 4 ) && feof ( lib_array[i].fp1 ) ) ) // file1 reaches end and not type1 type2 and not type6
- {
- if ( lib_array[i].curr_type == 4 )
- {
- if ( lib_array[i].fp3 && readstate < 0 ) // file3 reaches end
- { closeFp1InLab ( i ); }
-
- readstate = 0;
- }
- else
- {
- if ( lib_array[i].fp1 && feof ( lib_array[i].fp1 ) )
- {
- closeFp1InLab ( i );
-
- if ( lib_array[i].fp2 )
- {
- closeFp2InLab ( i );
- }
- }
- else if ( lib_array[i].fp2 && feof ( lib_array[i].fp2 ) )
- {
- closeFp2InLab ( i );
-
- if ( lib_array[i].fp1 )
- {
- closeFp1InLab ( i );
- }
- }
- }
-
- *libNo = nextValidIndex ( i, pair, asm_ctg );
- i = *libNo;
-
- if ( lib_array[i].rd_len_cutoff > 0 )
- { maxReadLen = lib_array[i].rd_len_cutoff < maxReadLen4all ? lib_array[i].rd_len_cutoff : maxReadLen4all; }
- else
- {
- maxReadLen = maxReadLen4all;
- }
-
- //record insert size info
- if ( pair && i != prevLib )
- {
- if ( readNumBack < n_solexa )
- {
- pes[gradsCounter].PE_bound = n_solexa;
- pes[gradsCounter].rank = lib_array[prevLib].rank;
- pes[gradsCounter].pair_num_cut = lib_array[prevLib].pair_num_cut;
- pes[gradsCounter++].insertS = lib_array[prevLib].avg_ins;
- readNumBack = n_solexa;
- }
- }
-
- if ( i >= num_libs )
- {
- return 0;
- }
-
- openFileInLib ( i );
-
- if ( lib_array[i].curr_type == 1 )
- {
- readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp1, -1 );
- readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp2, -1 );
- }
- else if ( lib_array[i].curr_type == 3 || lib_array[i].curr_type == 5 )
- {
- readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp1, -1 );
- }
- }
-
- if ( lib_array[i].curr_type == 1 )
- {
- if ( lib_array[i].paired == 1 )
- {
- readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp1, 1 );
-
- if ( lib_array[i].reverse )
- {
- reverse2k ( src_seq, *len_seq );
- }
-
- lib_array[i].paired = 2;
-
- if ( *len_seq > 0 || !feof ( lib_array[i].fp1 ) )
- {
- n_solexa++;
- return 1;
- }
- else
- {
- return read1seqInLib ( src_seq, src_name, len_seq, libNo, pair, asm_ctg, type );
- }
- }
- else
- {
- readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp2, 1 );
-
- if ( lib_array[i].reverse )
- {
- reverse2k ( src_seq, *len_seq );
- }
-
- lib_array[i].paired = 1;
- n_solexa++;
- return 1; //can't fail to read a read2
- }
- }
-
- if ( lib_array[i].curr_type == 2 )
- {
- if ( lib_array[i].paired == 1 )
- {
- read1seqfq ( src_seq, src_name, len_seq, lib_array[i].fp1 );
-
- /*
- if(*len_seq>0){
- for(j=0;j<*len_seq;j++)
- printf("%c",int2base(src_seq[j]));
- printf("\n");
- }
- */
- if ( lib_array[i].reverse )
- {
- reverse2k ( src_seq, *len_seq );
- }
-
- lib_array[i].paired = 2;
-
- if ( *len_seq > 0 || !feof ( lib_array[i].fp1 ) )
- {
- n_solexa++;
- return 1;
- }
- else
- {
- return read1seqInLib ( src_seq, src_name, len_seq, libNo, pair, asm_ctg, type );
- }
- }
- else
- {
- read1seqfq ( src_seq, src_name, len_seq, lib_array[i].fp2 );
-
- if ( lib_array[i].reverse )
- {
- reverse2k ( src_seq, *len_seq );
- }
-
- lib_array[i].paired = 1;
- n_solexa++;
- return 1; //can't fail to read a read2
- }
- }
-
- if ( lib_array[i].curr_type == 6 )
- {
- read1seqfq ( src_seq, src_name, len_seq, lib_array[i].fp1 );
- }
- else if ( lib_array[i].curr_type == 4 )
- {
- read1seqbam ( src_seq, src_name, len_seq, lib_array[i].fp3, type, lib_array[i].asm_flag );
- }
- else
- {
- readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp1, 1 );
- }
-
- /*
- int t;
- for(t=0;t<*len_seq;t++)
- printf("%d",src_seq[t]);
- printf("\n");
- */
- if ( lib_array[i].reverse )
- {
- reverse2k ( src_seq, *len_seq );
- }
-
- if ( lib_array[i].curr_type != 4 && ( *len_seq > 0 || !feof ( lib_array[i].fp1 ) ) )
- {
- n_solexa++;
- return 1;
- }
- else if ( lib_array[i].curr_type == 4 && ( *len_seq > 0 || readstate >= 0 ) )
- {
- n_solexa++;
- return 1;
- }
- else
- {
- return read1seqInLib ( src_seq, src_name, len_seq, libNo, pair, asm_ctg, type );
- }
+ int i = *libNo;
+ int prevLib = i;
+
+ if ( ( ( lib_array[i].curr_type != 4 ) && !lib_array[i].fp1 ) // file1 does not exist
+ || ( ( lib_array[i].curr_type == 4 ) && ( lib_array[i].fp3 == NULL ) ) //file3 does not exist
+ || ( ( lib_array[i].curr_type == 4 ) && readstate < 0 ) //file3 reaches end
+ || ( ( lib_array[i].curr_type == 1 ) && feof ( lib_array[i].fp1 ) && feof ( lib_array[i].fp2 ) ) //f1 && f2 reach end
+ || ( ( lib_array[i].curr_type == 2 ) && ( feof ( lib_array[i].fp1 ) || feof ( lib_array[i].fp2 ) )) //f1||f2 reaches end
+ || ( ( lib_array[i].curr_type != 1 && lib_array[i].curr_type != 2 ) && ( lib_array[i].curr_type != 4 ) && feof ( lib_array[i].fp1 ) ) ) // file1 reaches end and not type1 type2 and not type6
+ {
+ if ( lib_array[i].curr_type == 4 )
+ {
+ if ( lib_array[i].fp3 && readstate < 0 ) // file3 reaches end
+ {
+ closeFp1InLab ( i );
+ }
+
+ readstate = 0;
+ }
+ else
+ {
+ if ( lib_array[i].fp1 && feof ( lib_array[i].fp1 ) )
+ {
+ closeFp1InLab ( i );
+
+ if ( lib_array[i].fp2 )
+ {
+ closeFp2InLab ( i );
+ }
+ }
+ else if ( lib_array[i].fp2 && feof ( lib_array[i].fp2 ) )
+ {
+ closeFp2InLab ( i );
+
+ if ( lib_array[i].fp1 )
+ {
+ closeFp1InLab ( i );
+ }
+ }
+ }
+
+ *libNo = nextValidIndex ( i, pair, asm_ctg );
+ i = *libNo;
+
+ if ( lib_array[i].rd_len_cutoff > 0 )
+ {
+ maxReadLen = lib_array[i].rd_len_cutoff < maxReadLen4all ? lib_array[i].rd_len_cutoff : maxReadLen4all;
+ }
+ else
+ {
+ maxReadLen = maxReadLen4all;
+ }
+
+ //record insert size info
+ if ( pair && i != prevLib )
+ {
+ if ( readNumBack < n_solexa )
+ {
+ pes[gradsCounter].PE_bound = n_solexa;
+ pes[gradsCounter].rank = lib_array[prevLib].rank;
+ pes[gradsCounter].pair_num_cut = lib_array[prevLib].pair_num_cut;
+ pes[gradsCounter++].insertS = lib_array[prevLib].avg_ins;
+ readNumBack = n_solexa;
+ }
+ }
+
+ if ( i >= num_libs )
+ {
+ return 0;
+ }
+
+ openFileInLib ( i );
+
+ if ( lib_array[i].curr_type == 1 )
+ {
+ readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp1, -1 );
+ readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp2, -1 );
+ }
+ else if ( lib_array[i].curr_type == 3 || lib_array[i].curr_type == 5 )
+ {
+ readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp1, -1 );
+ }
+ }
+
+ if ( lib_array[i].curr_type == 1 )
+ {
+ if ( lib_array[i].paired == 1 )
+ {
+ readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp1, 1 );
+
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ lib_array[i].paired = 2;
+
+ if ( *len_seq > 0 || !feof ( lib_array[i].fp1 ) )
+ {
+ n_solexa++;
+ return 1;
+ }
+ else
+ {
+ return read1seqInLib ( src_seq, src_name, len_seq, libNo, pair, asm_ctg, type );
+ }
+ }
+ else
+ {
+ readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp2, 1 );
+
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ lib_array[i].paired = 1;
+ n_solexa++;
+ return 1; //can't fail to read a read2
+ }
+ }
+
+ if ( lib_array[i].curr_type == 2 )
+ {
+ if ( lib_array[i].paired == 1 )
+ {
+ read1seqfq ( src_seq, src_name, len_seq, lib_array[i].fp1 );
+
+ /*
+ if(*len_seq>0){
+ for(j=0;j<*len_seq;j++)
+ printf("%c",int2base(src_seq[j]));
+ printf("\n");
+ }
+ */
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ lib_array[i].paired = 2;
+
+ if ( *len_seq > 0 || !feof ( lib_array[i].fp1 ) )
+ {
+ n_solexa++;
+ return 1;
+ }
+ else
+ {
+ return read1seqInLib ( src_seq, src_name, len_seq, libNo, pair, asm_ctg, type );
+ }
+ }
+ else
+ {
+ read1seqfq ( src_seq, src_name, len_seq, lib_array[i].fp2 );
+
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ lib_array[i].paired = 1;
+ n_solexa++;
+ return 1; //can't fail to read a read2
+ }
+ }
+
+ if ( lib_array[i].curr_type == 6 )
+ {
+ read1seqfq ( src_seq, src_name, len_seq, lib_array[i].fp1 );
+ }
+ else if ( lib_array[i].curr_type == 4 )
+ {
+ read1seqbam ( src_seq, src_name, len_seq, lib_array[i].fp3, type, lib_array[i].asm_flag );
+ }
+ else
+ {
+ readseq1by1 ( src_seq, src_name, len_seq, lib_array[i].fp1, 1 );
+ }
+
+ /*
+ int t;
+ for(t=0;t<*len_seq;t++)
+ printf("%d",src_seq[t]);
+ printf("\n");
+ */
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ if ( lib_array[i].curr_type != 4 && ( *len_seq > 0 || !feof ( lib_array[i].fp1 ) ) )
+ {
+ n_solexa++;
+ return 1;
+ }
+ else if ( lib_array[i].curr_type == 4 && ( *len_seq > 0 || readstate >= 0 ) )
+ {
+ n_solexa++;
+ return 1;
+ }
+ else
+ {
+ return read1seqInLib ( src_seq, src_name, len_seq, libNo, pair, asm_ctg, type );
+ }
}
-boolean read1seqInLibBam ( char * src_seq, char * src_name, int * len_seq, int * libNo, boolean pair, unsigned char asm_ctg, int * type )
+boolean read1seqInLibBam ( char *src_seq, char *src_name, int *len_seq, int *libNo, boolean pair, unsigned char asm_ctg, int *type )
{
- int i = *libNo;
-
- if ( lib_array[i].fp3 == NULL )
- {
- fprintf ( stderr, "Empty file handle.\n" );
- return 0;
- }
-
- if ( lib_array[i].fp3 && readstate < 0 ) // file3 reaches end
- {
- closeFp1InLab ( i );
- readstate = 0;
- return 0;
- }
-
- read1seqbam ( src_seq, src_name, len_seq, lib_array[i].fp3, type, lib_array[i].asm_flag );
-
- if ( lib_array[i].reverse )
- {
- reverse2k ( src_seq, *len_seq );
- }
-
- if ( *len_seq > 0 || readstate >= 0 )
- {
- n_solexa++;
- return 1;
- }
- else
- { return read1seqInLibBam ( src_seq, src_name, len_seq, libNo, pair, asm_ctg, type ); }
+ int i = *libNo;
+
+ if ( lib_array[i].fp3 == NULL )
+ {
+ fprintf ( stderr, "Empty file handle.\n" );
+ return 0;
+ }
+
+ if ( lib_array[i].fp3 && readstate < 0 ) // file3 reaches end
+ {
+ closeFp1InLab ( i );
+ readstate = 0;
+ return 0;
+ }
+
+ read1seqbam ( src_seq, src_name, len_seq, lib_array[i].fp3, type, lib_array[i].asm_flag );
+
+ if ( lib_array[i].reverse )
+ {
+ reverse2k ( src_seq, *len_seq );
+ }
+
+ if ( *len_seq > 0 || readstate >= 0 )
+ {
+ n_solexa++;
+ return 1;
+ }
+ else
+ {
+ return read1seqInLibBam ( src_seq, src_name, len_seq, libNo, pair, asm_ctg, type );
+ }
}
-FILE * file = NULL;
-boolean read1seqInLibpos ( char * src_seq, char * src_name, int * len_seq, // FILE *file,
- int * file_No, int file_num, char ** fileName, int * fileType, int * maxLen, long * pos_seq )
+FILE *file = NULL;
+boolean read1seqInLibpos ( char *src_seq, char *src_name, int *len_seq, // FILE *file,
+ int *file_No, int file_num, char **fileName, int *fileType, int *maxLen, long *pos_seq )
{
- if ( *file_No < 0 || feof ( file ) )
- {
- ( *file_No ) ++;
-
- if ( *file_No >= file_num )
- { return 0; }
-
- maxReadLen = maxLen[*file_No];
-
- if ( file != NULL )
- {
- fclose ( file );
- }
-
- file = openFile4read ( fileName[*file_No] );
-
- if ( file != NULL )
- { fprintf ( stderr, "Import reads from file:\n %s\n", fileName[*file_No] ); }
-
- if ( fileType[*file_No] == 1 || fileType[*file_No] == 3 )
- {
- readseq1by1 ( src_seq, src_name, len_seq, file, 1 );
- // *pos_seq = curr_pos;
- }
- }
-
- // multi=1;
- *len_seq = 0;
-
- if ( fileType[*file_No] == 1 )
- {
- // *pos_seq = curr_pos;
- readseq1by1 ( src_seq, src_name, len_seq, file, 1 );
- }
- else if ( fileType[*file_No] == 2 )
- {
- read1seqfq ( src_seq, src_name, len_seq, file );
- // *pos_seq = curr_pos;
- }
- else if ( fileType[*file_No] == 3 )
- {
- // *pos_seq = curr_pos;
- readseq1by1 ( src_seq, src_name, len_seq, file, 1 );
- }
- else if ( fileType[*file_No] == 4 )
- {
- read1seqfq ( src_seq, src_name, len_seq, file );
- // *pos_seq = curr_pos;
- }
- else
- { return 0; }
-
- // multi=0;
- if ( *len_seq > 0 || feof ( file ) )
- {
- n_solexa++;
- return 1;
- }
- else
- { return read1seqInLibpos ( src_seq, src_name, len_seq, file_No, file_num, fileName, fileType, maxLen, pos_seq ); }
+ if ( *file_No < 0 || feof ( file ) )
+ {
+ ( *file_No ) ++;
+
+ if ( *file_No >= file_num )
+ {
+ return 0;
+ }
+
+ maxReadLen = maxLen[*file_No];
+
+ if ( file != NULL )
+ {
+ fclose ( file );
+ }
+
+ file = openFile4read ( fileName[*file_No] );
+
+ if ( file != NULL )
+ {
+ fprintf ( stderr, "Import reads from file:\n %s\n", fileName[*file_No] );
+ }
+
+ if ( fileType[*file_No] == 1 || fileType[*file_No] == 3 )
+ {
+ readseq1by1 ( src_seq, src_name, len_seq, file, 1 );
+ // *pos_seq = curr_pos;
+ }
+ }
+
+ // multi=1;
+ *len_seq = 0;
+
+ if ( fileType[*file_No] == 1 )
+ {
+ // *pos_seq = curr_pos;
+ readseq1by1 ( src_seq, src_name, len_seq, file, 1 );
+ }
+ else if ( fileType[*file_No] == 2 )
+ {
+ read1seqfq ( src_seq, src_name, len_seq, file );
+ // *pos_seq = curr_pos;
+ }
+ else if ( fileType[*file_No] == 3 )
+ {
+ // *pos_seq = curr_pos;
+ readseq1by1 ( src_seq, src_name, len_seq, file, 1 );
+ }
+ else if ( fileType[*file_No] == 4 )
+ {
+ read1seqfq ( src_seq, src_name, len_seq, file );
+ // *pos_seq = curr_pos;
+ }
+ else
+ {
+ return 0;
+ }
+
+ // multi=0;
+ if ( *len_seq > 0 || feof ( file ) )
+ {
+ n_solexa++;
+ return 1;
+ }
+ else
+ {
+ return read1seqInLibpos ( src_seq, src_name, len_seq, file_No, file_num, fileName, fileType, maxLen, pos_seq );
+ }
}
diff --git a/standardPregraph/scaffold.c b/standardPregraph/scaffold.c
index c712bae..d29b463 100644
--- a/standardPregraph/scaffold.c
+++ b/standardPregraph/scaffold.c
@@ -1,7 +1,7 @@
/*
* scaffold.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -26,7 +26,7 @@
#include "extfunc.h"
#include "extvab.h"
-static void initenv ( int argc, char ** argv );
+static void initenv ( int argc, char **argv );
static void display_scaff_usage ();
static boolean LINK, SCAFF;
@@ -56,178 +56,193 @@ Output:
Return:
0 if exit normally.
*************************************************/
-int call_scaffold ( int argc, char ** argv )
+int call_scaffold ( int argc, char **argv )
{
- time_t start_t, stop_t, time_bef, time_aft;
- fprintf ( stderr, "\n********************\n" );
- fprintf ( stderr, "Scaff\n" );
- fprintf ( stderr, "********************\n\n" );
- ctg_short = 0;
- time ( &start_t );
- initenv ( argc, argv );
- checkFiles4Scaff ( graphfile );
- loadPEgrads ( graphfile );
- time ( &time_bef );
- loadUpdatedEdges ( graphfile );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on loading updated edges: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
-
- if ( !SCAFF )
- {
- time ( &time_bef );
- PE2Links ( graphfile );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on loading paired-end reads information: %ds.\n", ( int ) ( time_aft - time_bef ) );
- time ( &time_bef );
- fprintf ( stderr, "\n*****************************************************\nStart to construct scaffolds.\n" );
- Links2Scaf ( graphfile );
- time ( &time_aft );
- fprintf ( stderr, "Time spent on constructing scaffolds: %ds.\n", ( int ) ( time_aft - time_bef ) );
- scaffolding ( 100, graphfile );
- }
-
- prlReadsCloseGap ( graphfile );
- ScafStat ( 100, graphfile );
- free_pe_mem ();
-
- if ( index_array )
- {
- free ( ( void * ) index_array );
- }
-
- freeContig_array ();
- destroyPreArcMem ();
- destroyConnectMem ();
- deleteCntLookupTable ();
- time ( &stop_t );
- fprintf ( stderr, "\nOverall time spent on constructing scaffolds: %dm.\n", ( int ) ( stop_t - start_t ) / 60 );
- return 0;
+ time_t start_t, stop_t, time_bef, time_aft;
+ fprintf ( stderr, "\n********************\n" );
+ fprintf ( stderr, "Scaff\n" );
+ fprintf ( stderr, "********************\n\n" );
+ ctg_short = 0;
+ time ( &start_t );
+ initenv ( argc, argv );
+ checkFiles4Scaff ( graphfile );
+ loadPEgrads ( graphfile );
+ time ( &time_bef );
+ loadUpdatedEdges ( graphfile );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on loading updated edges: %ds.\n\n", ( int ) ( time_aft - time_bef ) );
+
+ if ( !SCAFF )
+ {
+ time ( &time_bef );
+ PE2Links ( graphfile );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on loading paired-end reads information: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ time ( &time_bef );
+ fprintf ( stderr, "\n*****************************************************\nStart to construct scaffolds.\n" );
+ Links2Scaf ( graphfile );
+ time ( &time_aft );
+ fprintf ( stderr, "Time spent on constructing scaffolds: %ds.\n", ( int ) ( time_aft - time_bef ) );
+ scaffolding ( 100, graphfile );
+ }
+
+ prlReadsCloseGap ( graphfile );
+ ScafStat ( 100, graphfile );
+ free_pe_mem ();
+
+ if ( index_array )
+ {
+ free ( ( void * ) index_array );
+ }
+
+ freeContig_array ();
+ destroyPreArcMem ();
+ destroyConnectMem ();
+ deleteCntLookupTable ();
+ time ( &stop_t );
+ fprintf ( stderr, "\nOverall time spent on constructing scaffolds: %dm.\n", ( int ) ( stop_t - start_t ) / 60 );
+ return 0;
}
/*****************************************************************************
* Parse command line switches
*****************************************************************************/
-void initenv ( int argc, char ** argv )
+void initenv ( int argc, char **argv )
{
- int copt;
- int inpseq;
- extern char * optarg;
- char temp[256];
- inpseq = 0;
- LINK = 0;
- SCAFF = 0;
- optind = 1;
- fprintf ( stderr, "Parameters: scaff " );
-
- while ( ( copt = getopt ( argc, argv, "g:L:p:G:N:c:C:b:B:FzuSVw" ) ) != EOF )
- {
- switch ( copt )
- {
- case 'g':
- fprintf ( stderr, "-g %s ", optarg );
- inGraph = 1;
- sscanf ( optarg, "%s", graphfile );
- break;
- case 'G':
- fprintf ( stderr, "-G %s ", optarg );
- sscanf ( optarg, "%s", temp );
- GLDiff = atoi ( temp );
- break;
- case 'L':
- fprintf ( stderr, "-L %s ", optarg );
- sscanf ( optarg, "%s", temp );
- ctg_short = atoi ( temp );
- break;
- case 'N':
- fprintf ( stderr, "-N %s ", optarg );
- sscanf ( optarg, "%s", temp );
- known_genome_size = atoi ( temp );
- break;
- case 'F':
- fillGap = 1;
- fprintf ( stderr, "-F " );
- break;
- case 'u':
- maskRep = 0;
- fprintf ( stderr, "-u " );
- break;
- case 'S':
- SCAFF = 1;
- fprintf ( stderr, "-S " );
- break;
- case 'V':
- visual = 1;
- fprintf ( stderr, "-V " );
- break;
- case 'w':
- score_mask = 0;
- fprintf ( stderr, "-w " );
- break;
- case 'p':
- fprintf ( stderr, "-p %s ", optarg );
- sscanf ( optarg, "%s", temp );
- thrd_num = atoi ( temp );
- break;
- case 'c':
- fprintf ( stderr, "-c %s ", optarg );
- sscanf ( optarg, "%s", temp );
- cvg_low = atof ( temp ) > 0 ? atof ( temp ) : 0.0;
- break;
- case 'C':
- fprintf ( stderr, "-C %s ", optarg );
- sscanf ( optarg, "%s", temp );
- cvg_high = atof ( temp ) > 0 ? atof ( temp ) : 0.0;
- break;
- case 'b':
- fprintf ( stderr, "-b %s ", optarg );
- sscanf ( optarg, "%s", temp );
- ins_var_idx = atof ( temp ) > 1.0 ? atof ( temp ) : 0.0;
- break;
- case 'B':
- fprintf ( stderr, "-B %s ", optarg );
- sscanf ( optarg, "%s", temp );
- cvg4SNP = atof ( temp ) > 0 ? atof ( temp ) : 0.0;
- break;
- case 'z':
- COMPATIBLE_MODE = 1;
- fprintf ( stderr, "-m " );
- break;
- default:
-
- if ( inGraph == 0 )
- {
- display_scaff_usage ();
- exit ( -1 );
- }
- }
- }
-
- fprintf ( stderr, "\n\n" );
-
- if ( inGraph == 0 )
- {
- display_scaff_usage ();
- exit ( -1 );
- }
+ int copt;
+ int inpseq;
+ extern char *optarg;
+ char temp[256];
+ inpseq = 0;
+ LINK = 0;
+ SCAFF = 0;
+ optind = 1;
+ fprintf ( stderr, "Parameters: scaff " );
+
+ while ( ( copt = getopt ( argc, argv, "g:L:p:G:N:c:C:b:B:FzuSVw" ) ) != EOF )
+ {
+ switch ( copt )
+ {
+ case 'g':
+ fprintf ( stderr, "-g %s ", optarg );
+ inGraph = 1;
+ sscanf ( optarg, "%s", graphfile );
+ break;
+
+ case 'G':
+ fprintf ( stderr, "-G %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ GLDiff = atoi ( temp );
+ break;
+
+ case 'L':
+ fprintf ( stderr, "-L %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ ctg_short = atoi ( temp );
+ break;
+
+ case 'N':
+ fprintf ( stderr, "-N %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ known_genome_size = atoi ( temp );
+ break;
+
+ case 'F':
+ fillGap = 1;
+ fprintf ( stderr, "-F " );
+ break;
+
+ case 'u':
+ maskRep = 0;
+ fprintf ( stderr, "-u " );
+ break;
+
+ case 'S':
+ SCAFF = 1;
+ fprintf ( stderr, "-S " );
+ break;
+
+ case 'V':
+ visual = 1;
+ fprintf ( stderr, "-V " );
+ break;
+
+ case 'w':
+ score_mask = 0;
+ fprintf ( stderr, "-w " );
+ break;
+
+ case 'p':
+ fprintf ( stderr, "-p %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ thrd_num = atoi ( temp );
+ break;
+
+ case 'c':
+ fprintf ( stderr, "-c %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ cvg_low = atof ( temp ) > 0 ? atof ( temp ) : 0.0;
+ break;
+
+ case 'C':
+ fprintf ( stderr, "-C %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ cvg_high = atof ( temp ) > 0 ? atof ( temp ) : 0.0;
+ break;
+
+ case 'b':
+ fprintf ( stderr, "-b %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ ins_var_idx = atof ( temp ) > 1.0 ? atof ( temp ) : 0.0;
+ break;
+
+ case 'B':
+ fprintf ( stderr, "-B %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ cvg4SNP = atof ( temp ) > 0 ? atof ( temp ) : 0.0;
+ break;
+
+ case 'z':
+ COMPATIBLE_MODE = 1;
+ fprintf ( stderr, "-m " );
+ break;
+
+ default:
+
+ if ( inGraph == 0 )
+ {
+ display_scaff_usage ();
+ exit ( -1 );
+ }
+ }
+ }
+
+ fprintf ( stderr, "\n\n" );
+
+ if ( inGraph == 0 )
+ {
+ display_scaff_usage ();
+ exit ( -1 );
+ }
}
static void display_scaff_usage ()
{
- fprintf ( stderr, "\nscaff -g inputGraph [-F -z -u -S -w] [-G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -B bubbleCoverage -N genomeSize -p n_cpu]\n" );
- fprintf ( stderr, " -g <string> inputGraph: prefix of input graph file names\n" );
- fprintf ( stderr, " -F (optional) fill gaps in scaffold, [No]\n" );
- fprintf ( stderr, " -z (optional) use compatible mode to build scaffold with contig produced by Version 1.05, [No]\n" );
- fprintf ( stderr, " -u (optional) un-mask contigs with high/low coverage before scaffolding, [mask]\n" );
- fprintf ( stderr, " -S (optional) if scaffold structure exists, do gapfilling only(-F), [NO]\n" );
- fprintf ( stderr, " -w (optional) keep contigs weakly connected to other contigs in scaffold, [NO]\n" );
- fprintf ( stderr, " -V (optional) output information for Hawkeye to visualize the assembly, [NO]\n" );
- fprintf ( stderr, " -G <int> gapLenDiff: allowed length difference between estimated and filled gap, [50]\n" );
- fprintf ( stderr, " -L <int> minContigLen: shortest contig for scaffolding, [K+2]\n" );
- fprintf ( stderr, " -c <float> minContigCvg: minimum contig coverage (c*avgCvg), contigs shorter than 100bp with coverage smaller than c*avgCvg will be masked before scaffolding unless -u is set, [0.1]\n" );
- fprintf ( stderr, " -C <float> maxContigCvg: maximum contig coverage (C*avgCvg), contigs with coverage larger than C*avgCvg or contigs shorter than 100bp with coverage larger than 0.8*C*avgCvg will be masked before scaffolding unless -u is set, [2]\n" );
- fprintf ( stderr, " -b <float> insertSizeUpperBound: (b*avg_ins) will be used as upper bound of insert size for large insert size ( > 1000) when handling pair-end connections between contigs if b is set to larger than 1, [1.5]\n" );
- fprintf ( stderr, " -B <float> bubbleCoverage: remove contig with lower cvoerage in bubble structure if both contigs' coverage are smaller than bubbleCoverage*avgCvg, [0.6]\n" );
- fprintf ( stderr, " -N <int> genomeSize: genome size for statistics, [0]\n" );
- fprintf ( stderr, " -p <int> n_cpu: number of cpu for use, [8]\n" );
+ fprintf ( stderr, "\nscaff -g inputGraph [-F -z -u -S -w] [-G gapLenDiff -L minContigLen -c minContigCvg -C maxContigCvg -b insertSizeUpperBound -B bubbleCoverage -N genomeSize -p n_cpu]\n" );
+ fprintf ( stderr, " -g <string> inputGraph: prefix of input graph file names\n" );
+ fprintf ( stderr, " -F (optional) fill gaps in scaffold, [No]\n" );
+ fprintf ( stderr, " -z (optional) use compatible mode to build scaffold with contig produced by Version 1.05, [No]\n" );
+ fprintf ( stderr, " -u (optional) un-mask contigs with high/low coverage before scaffolding, [mask]\n" );
+ fprintf ( stderr, " -S (optional) if scaffold structure exists, do gapfilling only(-F), [NO]\n" );
+ fprintf ( stderr, " -w (optional) keep contigs weakly connected to other contigs in scaffold, [NO]\n" );
+ fprintf ( stderr, " -V (optional) output information for Hawkeye to visualize the assembly, [NO]\n" );
+ fprintf ( stderr, " -G <int> gapLenDiff: allowed length difference between estimated and filled gap, [50]\n" );
+ fprintf ( stderr, " -L <int> minContigLen: shortest contig for scaffolding, [K+2]\n" );
+ fprintf ( stderr, " -c <float> minContigCvg: minimum contig coverage (c*avgCvg), contigs shorter than 100bp with coverage smaller than c*avgCvg will be masked before scaffolding unless -u is set, [0.1]\n" );
+ fprintf ( stderr, " -C <float> maxContigCvg: maximum contig coverage (C*avgCvg), contigs with coverage larger than C*avgCvg or contigs shorter than 100bp with coverage larger than 0.8*C*avgCvg will be masked before scaffolding unless -u is set, [2]\n" );
+ fprintf ( stderr, " -b <float> insertSizeUpperBound: (b*avg_ins) will be used as upper bound of insert size for large insert size ( > 1000) when handling pair-end connections between contigs if b is set to larger than 1, [1.5]\n" );
+ fprintf ( stderr, " -B <float> bubbleCoverage: remove contig with lower cvoerage in bubble structure if both contigs' coverage are smaller than bubbleCoverage*avgCvg, [0.6]\n" );
+ fprintf ( stderr, " -N <int> genomeSize: genome size for statistics, [0]\n" );
+ fprintf ( stderr, " -p <int> n_cpu: number of cpu for use, [8]\n" );
}
diff --git a/standardPregraph/searchPath.c b/standardPregraph/searchPath.c
index 72981d2..908efca 100644
--- a/standardPregraph/searchPath.c
+++ b/standardPregraph/searchPath.c
@@ -1,7 +1,7 @@
/*
* searchPath.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -33,221 +33,221 @@ static int trace_limit = 5000; //the times function is called in a search
start from one contig, end with another
path length includes the length of the last contig
*/
-void traceAlongMaskedCnt ( unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int * num_route )
+void traceAlongMaskedCnt ( unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int *num_route )
{
- num_trace++;
-
- if ( num_trace > trace_limit || *num_route >= max_n_routes )
- {
- return;
- }
-
- unsigned int * array;
- int num, i, length;
- CONNECT * ite_cnt;
-
- if ( index > 0 ) // there're at most max_steps edges stored in this array including the destination edge
- {
- length = len + contig_array[currE].length;
- }
- else
- {
- length = 0;
- }
-
- if ( index > max_steps || length > max )
- {
- return;
- } // this is the only situation we stop
-
- if ( index > 0 ) // there're at most max_steps edges stored in this array including the destination edge
- {
- so_far[index - 1] = currE;
- }
-
- if ( currE == destE && index == 0 )
- {
- fprintf ( stderr, "The traceAlongMaskedCnt: start and destination are the same.\n" );
- return;
- }
-
- if ( currE == destE && length >= min && length <= max )
- {
- num = *num_route;
- array = found_routes[num];
-
- for ( i = 0; i < index; i++ )
- {
- array[i] = so_far[i];
- }
-
- if ( index < max_steps )
- {
- array[index] = 0;
- } //indicate the end of the route
-
- *num_route = ++num;
- } // one route is extrated, but we don't terminate searching
-
- ite_cnt = contig_array[currE].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( !ite_cnt->mask || ite_cnt->deleted )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- traceAlongMaskedCnt ( destE, ite_cnt->contigID, max_steps, min, max, index + 1, length + ite_cnt->gapLen, num_route );
- ite_cnt = ite_cnt->next;
- }
+ num_trace++;
+
+ if ( num_trace > trace_limit || *num_route >= max_n_routes )
+ {
+ return;
+ }
+
+ unsigned int *array;
+ int num, i, length;
+ CONNECT *ite_cnt;
+
+ if ( index > 0 ) // there're at most max_steps edges stored in this array including the destination edge
+ {
+ length = len + contig_array[currE].length;
+ }
+ else
+ {
+ length = 0;
+ }
+
+ if ( index > max_steps || length > max )
+ {
+ return;
+ } // this is the only situation we stop
+
+ if ( index > 0 ) // there're at most max_steps edges stored in this array including the destination edge
+ {
+ so_far[index - 1] = currE;
+ }
+
+ if ( currE == destE && index == 0 )
+ {
+ fprintf ( stderr, "The traceAlongMaskedCnt: start and destination are the same.\n" );
+ return;
+ }
+
+ if ( currE == destE && length >= min && length <= max )
+ {
+ num = *num_route;
+ array = found_routes[num];
+
+ for ( i = 0; i < index; i++ )
+ {
+ array[i] = so_far[i];
+ }
+
+ if ( index < max_steps )
+ {
+ array[index] = 0;
+ } //indicate the end of the route
+
+ *num_route = ++num;
+ } // one route is extrated, but we don't terminate searching
+
+ ite_cnt = contig_array[currE].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( !ite_cnt->mask || ite_cnt->deleted )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ traceAlongMaskedCnt ( destE, ite_cnt->contigID, max_steps, min, max, index + 1, length + ite_cnt->gapLen, num_route );
+ ite_cnt = ite_cnt->next;
+ }
}
// search connection paths from one connect to a contig
// path length includes the length of the last contig
-void traceAlongConnect ( unsigned int destE, CONNECT * currCNT, int max_steps, int min, int max, int index, int len, int * num_route )
+void traceAlongConnect ( unsigned int destE, CONNECT *currCNT, int max_steps, int min, int max, int index, int len, int *num_route )
{
- num_trace++;
-
- if ( num_trace > trace_limit || *num_route >= max_n_routes )
- {
- return;
- }
-
- unsigned int * array, currE;
- int num, i, length;
- CONNECT * ite_cnt;
- currE = currCNT->contigID;
- length = len + currCNT->gapLen;
- length += contig_array[currE].length;
-
- if ( index > max_steps || length > max )
- {
- return;
- } // this is the only situation we stop
-
- /*
- if(globalFlag)
- printf("B: step %d, ctg %d, length %d\n",index,currCNT->contigID,length);
- */
- if ( currE == destE && index == 1 )
- {
- fprintf ( stderr, "The traceAlongConnect: start and destination are the same.\n" );
- return;
- }
-
- so_far[index - 1] = currE; // there're at most max_steps edges stored in this array including the destination edge
-
- if ( currE == destE && length >= min && length <= max )
- {
- num = *num_route;
- array = found_routes[num];
-
- for ( i = 0; i < index; i++ )
- {
- array[i] = so_far[i];
- }
-
- if ( index < max_steps )
- {
- array[index] = 0;
- } //indicate the end of the route
-
- *num_route = ++num;
- } // one route is extrated, but we don't terminate searching
-
- if ( currCNT->nextInScaf )
- {
- traceAlongConnect ( destE, currCNT->nextInScaf, max_steps, min, max, index + 1, length, num_route );
- return;
- }
-
- ite_cnt = contig_array[currE].downwardConnect;
-
- while ( ite_cnt )
- {
- if ( ite_cnt->mask || ite_cnt->deleted )
- {
- ite_cnt = ite_cnt->next;
- continue;
- }
-
- traceAlongConnect ( destE, ite_cnt, max_steps, min, max, index + 1, length, num_route );
- ite_cnt = ite_cnt->next;
- }
+ num_trace++;
+
+ if ( num_trace > trace_limit || *num_route >= max_n_routes )
+ {
+ return;
+ }
+
+ unsigned int *array, currE;
+ int num, i, length;
+ CONNECT *ite_cnt;
+ currE = currCNT->contigID;
+ length = len + currCNT->gapLen;
+ length += contig_array[currE].length;
+
+ if ( index > max_steps || length > max )
+ {
+ return;
+ } // this is the only situation we stop
+
+ /*
+ if(globalFlag)
+ printf("B: step %d, ctg %d, length %d\n",index,currCNT->contigID,length);
+ */
+ if ( currE == destE && index == 1 )
+ {
+ fprintf ( stderr, "The traceAlongConnect: start and destination are the same.\n" );
+ return;
+ }
+
+ so_far[index - 1] = currE; // there're at most max_steps edges stored in this array including the destination edge
+
+ if ( currE == destE && length >= min && length <= max )
+ {
+ num = *num_route;
+ array = found_routes[num];
+
+ for ( i = 0; i < index; i++ )
+ {
+ array[i] = so_far[i];
+ }
+
+ if ( index < max_steps )
+ {
+ array[index] = 0;
+ } //indicate the end of the route
+
+ *num_route = ++num;
+ } // one route is extrated, but we don't terminate searching
+
+ if ( currCNT->nextInScaf )
+ {
+ traceAlongConnect ( destE, currCNT->nextInScaf, max_steps, min, max, index + 1, length, num_route );
+ return;
+ }
+
+ ite_cnt = contig_array[currE].downwardConnect;
+
+ while ( ite_cnt )
+ {
+ if ( ite_cnt->mask || ite_cnt->deleted )
+ {
+ ite_cnt = ite_cnt->next;
+ continue;
+ }
+
+ traceAlongConnect ( destE, ite_cnt, max_steps, min, max, index + 1, length, num_route );
+ ite_cnt = ite_cnt->next;
+ }
}
//find paths in the graph from currE to destE, its length does not include length of both end contigs
-void traceAlongArc ( unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int * num_route )
+void traceAlongArc ( unsigned int destE, unsigned int currE, int max_steps, int min, int max, int index, int len, int *num_route )
{
- num_trace++;
-
- if ( num_trace > trace_limit || *num_route >= max_n_routes )
- {
- return;
- }
-
- unsigned int * array, out_ed, vt;
- int num, i, pos, length;
- preARC * parc;
- pos = index;
-
- if ( pos > max_steps || len > max )
- {
- return;
- } // this is the only situation we stop
-
- if ( currE == destE && pos == 0 )
- {
- fprintf ( stderr, "The traceAlongArc: start and destination are the same.\n" );
- return;
- }
-
- if ( pos > 0 ) // pos starts with 0 for the starting edge
- {
- so_far[pos - 1] = currE;
- } // there're at most max_steps edges stored in this array including the destination edge
-
- if ( currE == destE && len >= min )
- {
- num = *num_route;
- array = found_routes[num];
-
- for ( i = 0; i < pos; i++ )
- {
- array[i] = so_far[i];
- }
-
- if ( pos < max_steps )
- {
- array[pos] = 0;
- } //indicate the end of the route
-
- *num_route = ++num;
- } // one route is extrated, but we don't terminate searching
-
- if ( pos == max_steps || len == max )
- {
- return;
- }
-
- if ( pos++ > 0 ) //not the starting edge
- {
- length = len + contig_array[currE].length;
- }
- else
- {
- length = len;
- }
-
- vt = contig_array[currE].to_vt;
- parc = contig_array[currE].arcs;
-
- while ( parc )
- {
- out_ed = parc->to_ed;
- traceAlongArc ( destE, out_ed, max_steps, min, max, pos, length, num_route );
- parc = parc->next;
- }
+ num_trace++;
+
+ if ( num_trace > trace_limit || *num_route >= max_n_routes )
+ {
+ return;
+ }
+
+ unsigned int *array, out_ed, vt;
+ int num, i, pos, length;
+ preARC *parc;
+ pos = index;
+
+ if ( pos > max_steps || len > max )
+ {
+ return;
+ } // this is the only situation we stop
+
+ if ( currE == destE && pos == 0 )
+ {
+ fprintf ( stderr, "The traceAlongArc: start and destination are the same.\n" );
+ return;
+ }
+
+ if ( pos > 0 ) // pos starts with 0 for the starting edge
+ {
+ so_far[pos - 1] = currE;
+ } // there're at most max_steps edges stored in this array including the destination edge
+
+ if ( currE == destE && len >= min )
+ {
+ num = *num_route;
+ array = found_routes[num];
+
+ for ( i = 0; i < pos; i++ )
+ {
+ array[i] = so_far[i];
+ }
+
+ if ( pos < max_steps )
+ {
+ array[pos] = 0;
+ } //indicate the end of the route
+
+ *num_route = ++num;
+ } // one route is extrated, but we don't terminate searching
+
+ if ( pos == max_steps || len == max )
+ {
+ return;
+ }
+
+ if ( pos++ > 0 ) //not the starting edge
+ {
+ length = len + contig_array[currE].length;
+ }
+ else
+ {
+ length = len;
+ }
+
+ vt = contig_array[currE].to_vt;
+ parc = contig_array[currE].arcs;
+
+ while ( parc )
+ {
+ out_ed = parc->to_ed;
+ traceAlongArc ( destE, out_ed, max_steps, min, max, pos, length, num_route );
+ parc = parc->next;
+ }
}
diff --git a/standardPregraph/seq.c b/standardPregraph/seq.c
index a24ce6e..b788a8d 100644
--- a/standardPregraph/seq.c
+++ b/standardPregraph/seq.c
@@ -1,7 +1,7 @@
/*
* seq.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -47,21 +47,21 @@ Return:
None.
*************************************************/
-void printTightString ( char * tightSeq, int len )
+void printTightString ( char *tightSeq, int len )
{
- int i;
+ int i;
- for ( i = 0; i < len; i++ )
- {
- fprintf ( stderr, "%c", int2base ( ( int ) getCharInTightString ( tightSeq, i ) ) );
+ for ( i = 0; i < len; i++ )
+ {
+ fprintf ( stderr, "%c", int2base ( ( int ) getCharInTightString ( tightSeq, i ) ) );
- if ( ( i + 1 ) % 100 == 0 && i < len - 1 )
- {
- fprintf ( stderr, "\n" );
- }
- }
+ if ( ( i + 1 ) % 100 == 0 && i < len - 1 )
+ {
+ fprintf ( stderr, "\n" );
+ }
+ }
- fprintf ( stderr, "\n" );
+ fprintf ( stderr, "\n" );
}
/*************************************************
@@ -78,29 +78,32 @@ Output:
Return:
None.
*************************************************/
-void writeChar2tightString ( char nt, char * tightSeq, int pos )
+void writeChar2tightString ( char nt, char *tightSeq, int pos )
{
- char * byte = tightSeq + pos / 4;
-
- switch ( pos % 4 )
- {
- case 0:
- *byte &= 63;
- *byte += nt << 6;
- return;
- case 1:
- *byte &= 207;
- *byte += nt << 4;
- return;
- case 2:
- *byte &= 243;
- *byte += nt << 2;
- return;
- case 3:
- *byte &= 252;
- *byte += nt;
- return;
- }
+ char *byte = tightSeq + pos / 4;
+
+ switch ( pos % 4 )
+ {
+ case 0:
+ *byte &= 63;
+ *byte += nt << 6;
+ return;
+
+ case 1:
+ *byte &= 207;
+ *byte += nt << 4;
+ return;
+
+ case 2:
+ *byte &= 243;
+ *byte += nt << 2;
+ return;
+
+ case 3:
+ *byte &= 252;
+ *byte += nt;
+ return;
+ }
}
/*************************************************
@@ -116,23 +119,26 @@ Output:
Return:
The target base.
*************************************************/
-char getCharInTightString ( char * tightSeq, int pos )
+char getCharInTightString ( char *tightSeq, int pos )
{
- char * byte = tightSeq + pos / 4;
-
- switch ( pos % 4 )
- {
- case 3:
- return ( *byte & 3 );
- case 2:
- return ( *byte & 12 ) >> 2;
- case 1:
- return ( *byte & 48 ) >> 4;
- case 0:
- return ( *byte & 192 ) >> 6;
- }
-
- return 0;
+ char *byte = tightSeq + pos / 4;
+
+ switch ( pos % 4 )
+ {
+ case 3:
+ return ( *byte & 3 );
+
+ case 2:
+ return ( *byte & 12 ) >> 2;
+
+ case 1:
+ return ( *byte & 48 ) >> 4;
+
+ case 0:
+ return ( *byte & 192 ) >> 6;
+ }
+
+ return 0;
}
/*************************************************
@@ -148,21 +154,21 @@ Output:
Return:
None.
*************************************************/
-void reverseComplementSeq ( char * seq, int len, char * bal_seq )
+void reverseComplementSeq ( char *seq, int len, char *bal_seq )
{
- int i, index = 0;
+ int i, index = 0;
- if ( len < 1 )
- {
- return;
- }
+ if ( len < 1 )
+ {
+ return;
+ }
- for ( i = len - 1; i >= 0; i-- )
- {
- bal_seq[index++] = int_comp ( seq[i] );
- }
+ for ( i = len - 1; i >= 0; i-- )
+ {
+ bal_seq[index++] = int_comp ( seq[i] );
+ }
- return;
+ return;
}
/*************************************************
@@ -178,36 +184,36 @@ Output:
Return:
The reversed complement of sequence "seq".
*************************************************/
-char * compl_int_seq ( char * seq, int len )
+char *compl_int_seq ( char *seq, int len )
{
- char * bal_seq = NULL, c, bal_c;
- int i, index;
-
- if ( len < 1 )
- {
- return bal_seq;
- }
-
- bal_seq = ( char * ) ckalloc ( len * sizeof ( char ) );
- index = 0;
-
- for ( i = len - 1; i >= 0; i-- )
- {
- c = seq[i];
-
- if ( c < 4 )
- {
- bal_c = int_comp ( c );
- } //3-c;
- else
- {
- bal_c = c;
- }
-
- bal_seq[index++] = bal_c;
- }
-
- return bal_seq;
+ char *bal_seq = NULL, c, bal_c;
+ int i, index;
+
+ if ( len < 1 )
+ {
+ return bal_seq;
+ }
+
+ bal_seq = ( char * ) ckalloc ( len * sizeof ( char ) );
+ index = 0;
+
+ for ( i = len - 1; i >= 0; i-- )
+ {
+ c = seq[i];
+
+ if ( c < 4 )
+ {
+ bal_c = int_comp ( c );
+ } //3-c;
+ else
+ {
+ bal_c = c;
+ }
+
+ bal_seq[index++] = bal_c;
+ }
+
+ return bal_seq;
}
/*************************************************
@@ -223,18 +229,18 @@ Output:
Return:
The integer.
*************************************************/
-long long trans_seq ( char * seq, int len )
+long long trans_seq ( char *seq, int len )
{
- int i;
- long long res;
- res = 0;
+ int i;
+ long long res;
+ res = 0;
- for ( i = 0; i < len; i++ )
- {
- res = res * 4 + seq[i];
- }
+ for ( i = 0; i < len; i++ )
+ {
+ res = res * 4 + seq[i];
+ }
- return ( res );
+ return ( res );
}
/*
diff --git a/standardPregraph/splitReps.c b/standardPregraph/splitReps.c
index 46ebf6d..8df54f4 100644
--- a/standardPregraph/splitReps.c
+++ b/standardPregraph/splitReps.c
@@ -1,7 +1,7 @@
/*
* splitReps.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -50,40 +50,40 @@ Return:
*************************************************/
static boolean interferingCheck ( unsigned int edgeno, int repTimes )
{
- int i, j, t;
- unsigned int bal_ed;
- involved[0] = edgeno;
- i = 1;
-
- for ( j = 0; j < repTimes; j++ )
- {
- involved[i++] = lefts[j];
- }
-
- for ( j = 0; j < repTimes; j++ )
- {
- involved[i++] = rights[j];
- }
-
- for ( j = 0; j < i - 1; j++ )
- for ( t = j + 1; t < i; t++ )
- if ( involved[j] == involved[t] )
- {
- return 1;
- }
-
- for ( j = 0; j < i; j++ )
- {
- bal_ed = getTwinEdge ( involved[j] );
-
- for ( t = 0; t < i; t++ )
- if ( bal_ed == involved[t] )
- {
- return 1;
- }
- }
-
- return 0;
+ int i, j, t;
+ unsigned int bal_ed;
+ involved[0] = edgeno;
+ i = 1;
+
+ for ( j = 0; j < repTimes; j++ )
+ {
+ involved[i++] = lefts[j];
+ }
+
+ for ( j = 0; j < repTimes; j++ )
+ {
+ involved[i++] = rights[j];
+ }
+
+ for ( j = 0; j < i - 1; j++ )
+ for ( t = j + 1; t < i; t++ )
+ if ( involved[j] == involved[t] )
+ {
+ return 1;
+ }
+
+ for ( j = 0; j < i; j++ )
+ {
+ bal_ed = getTwinEdge ( involved[j] );
+
+ for ( t = 0; t < i; t++ )
+ if ( bal_ed == involved[t] )
+ {
+ return 1;
+ }
+ }
+
+ return 0;
}
/*************************************************
@@ -98,30 +98,30 @@ Output:
Return:
The first downstream arc.
*************************************************/
-static ARC * arcCounts ( unsigned int edgeid, unsigned int * num )
+static ARC *arcCounts ( unsigned int edgeid, unsigned int *num )
{
- ARC * arc;
- ARC * firstValidArc = NULL;
- unsigned int count = 0;
- arc = edge_array[edgeid].arcs;
-
- while ( arc )
- {
- if ( arc->to_ed > 0 )
- {
- count++;
- }
-
- if ( count == 1 )
- {
- firstValidArc = arc;
- }
-
- arc = arc->next;
- }
-
- *num = count;
- return firstValidArc;
+ ARC *arc;
+ ARC *firstValidArc = NULL;
+ unsigned int count = 0;
+ arc = edge_array[edgeid].arcs;
+
+ while ( arc )
+ {
+ if ( arc->to_ed > 0 )
+ {
+ count++;
+ }
+
+ if ( count == 1 )
+ {
+ firstValidArc = arc;
+ }
+
+ arc = arc->next;
+ }
+
+ *num = count;
+ return firstValidArc;
}
/*************************************************
@@ -139,29 +139,29 @@ Return:
*************************************************/
static boolean readOnEdge ( long long readid, unsigned int edge )
{
- int index;
- int markNum;
- long long * marklist;
-
- if ( edge_array[edge].markers )
- {
- markNum = edge_array[edge].multi;
- marklist = edge_array[edge].markers;
- }
- else
- {
- return 0;
- }
-
- for ( index = 0; index < markNum; index++ )
- {
- if ( readid == marklist[index] )
- {
- return 1;
- }
- }
-
- return 0;
+ int index;
+ int markNum;
+ long long *marklist;
+
+ if ( edge_array[edge].markers )
+ {
+ markNum = edge_array[edge].multi;
+ marklist = edge_array[edge].markers;
+ }
+ else
+ {
+ return 0;
+ }
+
+ for ( index = 0; index < markNum; index++ )
+ {
+ if ( readid == marklist[index] )
+ {
+ return 1;
+ }
+ }
+
+ return 0;
}
/*************************************************
@@ -180,37 +180,37 @@ Return:
*************************************************/
static long long cntByReads ( unsigned int left, unsigned int middle, unsigned int right )
{
- int markNum;
- long long * marklist;
-
- if ( edge_array[left].markers )
- {
- markNum = edge_array[left].multi;
- marklist = edge_array[left].markers;
- }
- else
- {
- return 0;
- }
-
- int index;
- long long readid;
-
- /*
- if(middle==8553)
- printf("%d markers on %d\n",markNum,left);
- */
- for ( index = 0; index < markNum; index++ )
- {
- readid = marklist[index];
-
- if ( readOnEdge ( readid, middle ) && readOnEdge ( readid, right ) )
- {
- return readid;
- }
- }
-
- return 0;
+ int markNum;
+ long long *marklist;
+
+ if ( edge_array[left].markers )
+ {
+ markNum = edge_array[left].multi;
+ marklist = edge_array[left].markers;
+ }
+ else
+ {
+ return 0;
+ }
+
+ int index;
+ long long readid;
+
+ /*
+ if(middle==8553)
+ printf("%d markers on %d\n",markNum,left);
+ */
+ for ( index = 0; index < markNum; index++ )
+ {
+ readid = marklist[index];
+
+ if ( readOnEdge ( readid, middle ) && readOnEdge ( readid, right ) )
+ {
+ return readid;
+ }
+ }
+
+ return 0;
}
/*
@@ -236,154 +236,154 @@ Return:
*************************************************/
unsigned int solvable ( unsigned int edgeno )
{
- if ( EdSameAsTwin ( edgeno ) || edge_array[edgeno].multi == 255 )
- {
- return 0;
- }
-
- unsigned int bal_ed = getTwinEdge ( edgeno );
- unsigned int arcRight_n, arcLeft_n;
- unsigned int counter;
- unsigned int i, j;
- unsigned int branch, bal_branch;
- ARC * parcL, *parcR;
- parcL = arcCounts ( bal_ed, &arcLeft_n );
-
- if ( arcLeft_n < 2 )
- {
- return 0;
- }
-
- parcR = arcCounts ( edgeno, &arcRight_n );
-
- if ( arcLeft_n != arcRight_n )
- {
- return 0;
- }
-
- // check each right branch only has one upsteam connection
- /*
- if(edgeno==2551){
- for(i=0;i<arcLeft_n;i++)
- printf("%d,",lefts[i]);
- printf("__left to %d\n",edgeno);
- for(j=0;j<arcRight_n;j++)
- printf("%d,",rights[j]);
- printf("__right to %d\n",edgeno);
- }
- */
- arcRight_n = 0;
-
- while ( parcR )
- {
- if ( parcR->to_ed == 0 )
- {
- parcR = parcR->next;
- continue;
- }
-
- branch = parcR->to_ed;
-
- if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
- {
- return 0;
- }
-
- rights[arcRight_n++] = branch;
-
- if ( arcRight_n >= MAX_BRANCH_NUM )
- {
- fprintf ( stderr, "ERROR: right arc number is bigger than the max %d.\n", MAX_BRANCH_NUM );
- exit ( -1 );
- }
-
- bal_branch = getTwinEdge ( branch );
- arcCounts ( bal_branch, &counter );
-
- if ( counter != 1 )
- {
- return 0;
- }
-
- parcR = parcR->next;
- }
-
- // check if each left branch only has one downsteam connection
- arcLeft_n = 0;
-
- while ( parcL )
- {
- if ( parcL->to_ed == 0 )
- {
- parcL = parcL->next;
- continue;
- }
-
- branch = parcL->to_ed;
-
- if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
- {
- return 0;
- }
-
- bal_branch = getTwinEdge ( branch );
- lefts[arcLeft_n++] = bal_branch;
-
- if ( arcLeft_n >= MAX_BRANCH_NUM )
- {
- fprintf ( stderr, "ERROR: left arc number is bigger than the max %d.\n", MAX_BRANCH_NUM );
- exit ( -1 );
- }
-
- arcCounts ( bal_branch, &counter );
-
- if ( counter != 1 )
- {
- return 0;
- }
-
- parcL = parcL->next;
- }
-
- //check if reads indicate one to one connection between upsteam and downstream edges
-
- for ( i = 0; i < arcLeft_n; i++ )
- {
- counter = 0;
-
- for ( j = 0; j < arcRight_n; j++ )
- {
- gothrough[i][j] = cntByReads ( lefts[i], edgeno, rights[j] ) == 0 ? 0 : 1;
- counter += gothrough[i][j];
-
- if ( counter > 1 )
- {
- return 0;
- }
- }
-
- if ( counter != 1 )
- {
- return 0;
- }
- }
-
- for ( j = 0; j < arcRight_n; j++ )
- {
- counter = 0;
-
- for ( i = 0; i < arcLeft_n; i++ )
- {
- counter += gothrough[i][j];
- }
-
- if ( counter != 1 )
- {
- return 0;
- }
- }
-
- return arcLeft_n;
+ if ( EdSameAsTwin ( edgeno ) || edge_array[edgeno].multi == 255 )
+ {
+ return 0;
+ }
+
+ unsigned int bal_ed = getTwinEdge ( edgeno );
+ unsigned int arcRight_n, arcLeft_n;
+ unsigned int counter;
+ unsigned int i, j;
+ unsigned int branch, bal_branch;
+ ARC *parcL, *parcR;
+ parcL = arcCounts ( bal_ed, &arcLeft_n );
+
+ if ( arcLeft_n < 2 )
+ {
+ return 0;
+ }
+
+ parcR = arcCounts ( edgeno, &arcRight_n );
+
+ if ( arcLeft_n != arcRight_n )
+ {
+ return 0;
+ }
+
+ // check each right branch only has one upsteam connection
+ /*
+ if(edgeno==2551){
+ for(i=0;i<arcLeft_n;i++)
+ printf("%d,",lefts[i]);
+ printf("__left to %d\n",edgeno);
+ for(j=0;j<arcRight_n;j++)
+ printf("%d,",rights[j]);
+ printf("__right to %d\n",edgeno);
+ }
+ */
+ arcRight_n = 0;
+
+ while ( parcR )
+ {
+ if ( parcR->to_ed == 0 )
+ {
+ parcR = parcR->next;
+ continue;
+ }
+
+ branch = parcR->to_ed;
+
+ if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
+ {
+ return 0;
+ }
+
+ rights[arcRight_n++] = branch;
+
+ if ( arcRight_n >= MAX_BRANCH_NUM )
+ {
+ fprintf ( stderr, "ERROR: right arc number is bigger than the max %d.\n", MAX_BRANCH_NUM );
+ exit ( -1 );
+ }
+
+ bal_branch = getTwinEdge ( branch );
+ arcCounts ( bal_branch, &counter );
+
+ if ( counter != 1 )
+ {
+ return 0;
+ }
+
+ parcR = parcR->next;
+ }
+
+ // check if each left branch only has one downsteam connection
+ arcLeft_n = 0;
+
+ while ( parcL )
+ {
+ if ( parcL->to_ed == 0 )
+ {
+ parcL = parcL->next;
+ continue;
+ }
+
+ branch = parcL->to_ed;
+
+ if ( EdSameAsTwin ( branch ) || edge_array[branch].multi == 255 )
+ {
+ return 0;
+ }
+
+ bal_branch = getTwinEdge ( branch );
+ lefts[arcLeft_n++] = bal_branch;
+
+ if ( arcLeft_n >= MAX_BRANCH_NUM )
+ {
+ fprintf ( stderr, "ERROR: left arc number is bigger than the max %d.\n", MAX_BRANCH_NUM );
+ exit ( -1 );
+ }
+
+ arcCounts ( bal_branch, &counter );
+
+ if ( counter != 1 )
+ {
+ return 0;
+ }
+
+ parcL = parcL->next;
+ }
+
+ //check if reads indicate one to one connection between upsteam and downstream edges
+
+ for ( i = 0; i < arcLeft_n; i++ )
+ {
+ counter = 0;
+
+ for ( j = 0; j < arcRight_n; j++ )
+ {
+ gothrough[i][j] = cntByReads ( lefts[i], edgeno, rights[j] ) == 0 ? 0 : 1;
+ counter += gothrough[i][j];
+
+ if ( counter > 1 )
+ {
+ return 0;
+ }
+ }
+
+ if ( counter != 1 )
+ {
+ return 0;
+ }
+ }
+
+ for ( j = 0; j < arcRight_n; j++ )
+ {
+ counter = 0;
+
+ for ( i = 0; i < arcLeft_n; i++ )
+ {
+ counter += gothrough[i][j];
+ }
+
+ if ( counter != 1 )
+ {
+ return 0;
+ }
+ }
+
+ return arcLeft_n;
}
/*************************************************
@@ -401,59 +401,59 @@ Return:
*************************************************/
static unsigned int cp1edge ( unsigned int source, unsigned int target )
{
- int length = edge_array[source].length;
- char * tightSeq;
- int index;
- unsigned int bal_source = getTwinEdge ( source );
- unsigned int bal_target;
-
- if ( bal_source > source )
- {
- bal_target = target + 1;
- }
- else
- {
- bal_target = target;
- target = target + 1;
- }
-
- tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
-
- for ( index = 0; index < length / 4 + 1; index++ )
- {
- tightSeq[index] = edge_array[source].seq[index];
- }
-
- edge_array[target].length = length;
- edge_array[target].cvg = edge_array[source].cvg;
- edge_array[target].to_vt = edge_array[source].to_vt;
- edge_array[target].from_vt = edge_array[source].from_vt;
- edge_array[target].seq = tightSeq;
- edge_array[target].bal_edge = edge_array[source].bal_edge;
- edge_array[target].rv = NULL;
- edge_array[target].arcs = NULL;
- edge_array[target].markers = NULL;
- edge_array[target].flag = 0;
- edge_array[target].deleted = 0;
- tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
-
- for ( index = 0; index < length / 4 + 1; index++ )
- {
- tightSeq[index] = edge_array[bal_source].seq[index];
- }
-
- edge_array[bal_target].length = length;
- edge_array[bal_target].cvg = edge_array[bal_source].cvg;
- edge_array[bal_target].to_vt = edge_array[bal_source].to_vt;
- edge_array[bal_target].from_vt = edge_array[bal_source].from_vt;
- edge_array[bal_target].seq = tightSeq;
- edge_array[bal_target].bal_edge = edge_array[bal_source].bal_edge;
- edge_array[bal_target].rv = NULL;
- edge_array[bal_target].arcs = NULL;
- edge_array[bal_target].markers = NULL;
- edge_array[bal_target].flag = 0;
- edge_array[bal_target].deleted = 0;
- return target;
+ int length = edge_array[source].length;
+ char *tightSeq;
+ int index;
+ unsigned int bal_source = getTwinEdge ( source );
+ unsigned int bal_target;
+
+ if ( bal_source > source )
+ {
+ bal_target = target + 1;
+ }
+ else
+ {
+ bal_target = target;
+ target = target + 1;
+ }
+
+ tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
+
+ for ( index = 0; index < length / 4 + 1; index++ )
+ {
+ tightSeq[index] = edge_array[source].seq[index];
+ }
+
+ edge_array[target].length = length;
+ edge_array[target].cvg = edge_array[source].cvg;
+ edge_array[target].to_vt = edge_array[source].to_vt;
+ edge_array[target].from_vt = edge_array[source].from_vt;
+ edge_array[target].seq = tightSeq;
+ edge_array[target].bal_edge = edge_array[source].bal_edge;
+ edge_array[target].rv = NULL;
+ edge_array[target].arcs = NULL;
+ edge_array[target].markers = NULL;
+ edge_array[target].flag = 0;
+ edge_array[target].deleted = 0;
+ tightSeq = ( char * ) ckalloc ( ( length / 4 + 1 ) * sizeof ( char ) );
+
+ for ( index = 0; index < length / 4 + 1; index++ )
+ {
+ tightSeq[index] = edge_array[bal_source].seq[index];
+ }
+
+ edge_array[bal_target].length = length;
+ edge_array[bal_target].cvg = edge_array[bal_source].cvg;
+ edge_array[bal_target].to_vt = edge_array[bal_source].to_vt;
+ edge_array[bal_target].from_vt = edge_array[bal_source].from_vt;
+ edge_array[bal_target].seq = tightSeq;
+ edge_array[bal_target].bal_edge = edge_array[bal_source].bal_edge;
+ edge_array[bal_target].rv = NULL;
+ edge_array[bal_target].arcs = NULL;
+ edge_array[bal_target].markers = NULL;
+ edge_array[bal_target].flag = 0;
+ edge_array[bal_target].deleted = 0;
+ return target;
}
/*************************************************
@@ -473,58 +473,58 @@ Return:
*************************************************/
static void moveArc2cp ( unsigned int leftEd, unsigned int rightEd, unsigned int source, unsigned int target )
{
- unsigned int bal_left = getTwinEdge ( leftEd );
- unsigned int bal_right = getTwinEdge ( rightEd );
- unsigned int bal_source = getTwinEdge ( source );
- unsigned int bal_target = getTwinEdge ( target );
- ARC * arc;
- ARC * newArc, *twinArc;
- //between left and source
- arc = getArcBetween ( leftEd, source );
- arc->to_ed = 0;
- newArc = allocateArc ( target );
- newArc->multiplicity = arc->multiplicity;
- newArc->prev = NULL;
- newArc->next = edge_array[leftEd].arcs;
-
- if ( edge_array[leftEd].arcs )
- {
- edge_array[leftEd].arcs->prev = newArc;
- }
-
- edge_array[leftEd].arcs = newArc;
- arc = getArcBetween ( bal_source, bal_left );
- arc->to_ed = 0;
- twinArc = allocateArc ( bal_left );
- twinArc->multiplicity = arc->multiplicity;
- twinArc->prev = NULL;
- twinArc->next = NULL;
- edge_array[bal_target].arcs = twinArc;
- newArc->bal_arc = twinArc;
- twinArc->bal_arc = newArc;
- //between source and right
- arc = getArcBetween ( source, rightEd );
- arc->to_ed = 0;
- newArc = allocateArc ( rightEd );
- newArc->multiplicity = arc->multiplicity;
- newArc->prev = NULL;
- newArc->next = NULL;
- edge_array[target].arcs = newArc;
- arc = getArcBetween ( bal_right, bal_source );
- arc->to_ed = 0;
- twinArc = allocateArc ( bal_target );
- twinArc->multiplicity = arc->multiplicity;
- twinArc->prev = NULL;
- twinArc->next = edge_array[bal_right].arcs;
-
- if ( edge_array[bal_right].arcs )
- {
- edge_array[bal_right].arcs->prev = twinArc;
- }
-
- edge_array[bal_right].arcs = twinArc;
- newArc->bal_arc = twinArc;
- twinArc->bal_arc = newArc;
+ unsigned int bal_left = getTwinEdge ( leftEd );
+ unsigned int bal_right = getTwinEdge ( rightEd );
+ unsigned int bal_source = getTwinEdge ( source );
+ unsigned int bal_target = getTwinEdge ( target );
+ ARC *arc;
+ ARC *newArc, *twinArc;
+ //between left and source
+ arc = getArcBetween ( leftEd, source );
+ arc->to_ed = 0;
+ newArc = allocateArc ( target );
+ newArc->multiplicity = arc->multiplicity;
+ newArc->prev = NULL;
+ newArc->next = edge_array[leftEd].arcs;
+
+ if ( edge_array[leftEd].arcs )
+ {
+ edge_array[leftEd].arcs->prev = newArc;
+ }
+
+ edge_array[leftEd].arcs = newArc;
+ arc = getArcBetween ( bal_source, bal_left );
+ arc->to_ed = 0;
+ twinArc = allocateArc ( bal_left );
+ twinArc->multiplicity = arc->multiplicity;
+ twinArc->prev = NULL;
+ twinArc->next = NULL;
+ edge_array[bal_target].arcs = twinArc;
+ newArc->bal_arc = twinArc;
+ twinArc->bal_arc = newArc;
+ //between source and right
+ arc = getArcBetween ( source, rightEd );
+ arc->to_ed = 0;
+ newArc = allocateArc ( rightEd );
+ newArc->multiplicity = arc->multiplicity;
+ newArc->prev = NULL;
+ newArc->next = NULL;
+ edge_array[target].arcs = newArc;
+ arc = getArcBetween ( bal_right, bal_source );
+ arc->to_ed = 0;
+ twinArc = allocateArc ( bal_target );
+ twinArc->multiplicity = arc->multiplicity;
+ twinArc->prev = NULL;
+ twinArc->next = edge_array[bal_right].arcs;
+
+ if ( edge_array[bal_right].arcs )
+ {
+ edge_array[bal_right].arcs->prev = twinArc;
+ }
+
+ edge_array[bal_right].arcs = twinArc;
+ newArc->bal_arc = twinArc;
+ twinArc->bal_arc = newArc;
}
/*************************************************
@@ -542,38 +542,38 @@ Return:
*************************************************/
static void split1edge ( unsigned int edgeno, int repTimes )
{
- int i, j;
- unsigned int target;
-
- for ( i = 1; i < repTimes; i++ )
- {
- for ( j = 0; j < repTimes; j++ )
- if ( gothrough[i][j] > 0 ) // a path supported by read
- {
- break;
- }
-
- target = cp1edge ( edgeno, extraEdgeNum );
- moveArc2cp ( lefts[i], rights[j], edgeno, target );
- extraEdgeNum += 2;
- }
+ int i, j;
+ unsigned int target;
+
+ for ( i = 1; i < repTimes; i++ )
+ {
+ for ( j = 0; j < repTimes; j++ )
+ if ( gothrough[i][j] > 0 ) // a path supported by read
+ {
+ break;
+ }
+
+ target = cp1edge ( edgeno, extraEdgeNum );
+ moveArc2cp ( lefts[i], rights[j], edgeno, target );
+ extraEdgeNum += 2;
+ }
}
static void debugging ( unsigned int i )
{
- ARC * parc;
- parc = edge_array[i].arcs;
-
- if ( !parc )
- {
- fprintf ( stderr, "No downward connection for %d.\n", i );
- }
-
- while ( parc )
- {
- fprintf ( stderr, "%d -> %d\n", i, parc->to_ed );
- parc = parc->next;
- }
+ ARC *parc;
+ parc = edge_array[i].arcs;
+
+ if ( !parc )
+ {
+ fprintf ( stderr, "No downward connection for %d.\n", i );
+ }
+
+ while ( parc )
+ {
+ fprintf ( stderr, "%d -> %d\n", i, parc->to_ed );
+ parc = parc->next;
+ }
}
/*************************************************
@@ -590,45 +590,45 @@ Return:
*************************************************/
void solveReps ()
{
- unsigned int i;
- unsigned int repTime;
- int counter = 0;
- boolean flag;
- //debugging(30514);
- extraEdgeNum = num_ed + 1;
-
- for ( i = 1; i <= num_ed; i++ )
- {
- repTime = solvable ( i );
-
- if ( repTime == 0 )
- {
- continue;
- }
-
- flag = interferingCheck ( i, repTime );
-
- if ( flag )
- {
- continue;
- }
-
- split1edge ( i, repTime );
- counter++; //+= 2*(repTime-1);
-
- if ( EdSmallerThanTwin ( i ) )
- {
- i++;
- }
- }
-
- fprintf ( stderr, "%d repeat(s) are solvable, %d more edge(s).\n", counter, extraEdgeNum - 1 - num_ed );
- num_ed = extraEdgeNum - 1;
- removeDeadArcs ();
-
- if ( markersArray )
- {
- free ( ( void * ) markersArray );
- markersArray = NULL;
- }
+ unsigned int i;
+ unsigned int repTime;
+ int counter = 0;
+ boolean flag;
+ //debugging(30514);
+ extraEdgeNum = num_ed + 1;
+
+ for ( i = 1; i <= num_ed; i++ )
+ {
+ repTime = solvable ( i );
+
+ if ( repTime == 0 )
+ {
+ continue;
+ }
+
+ flag = interferingCheck ( i, repTime );
+
+ if ( flag )
+ {
+ continue;
+ }
+
+ split1edge ( i, repTime );
+ counter++; //+= 2*(repTime-1);
+
+ if ( EdSmallerThanTwin ( i ) )
+ {
+ i++;
+ }
+ }
+
+ fprintf ( stderr, "%d repeat(s) are solvable, %d more edge(s).\n", counter, extraEdgeNum - 1 - num_ed );
+ num_ed = extraEdgeNum - 1;
+ removeDeadArcs ();
+
+ if ( markersArray )
+ {
+ free ( ( void * ) markersArray );
+ markersArray = NULL;
+ }
}
diff --git a/standardPregraph/stack.c b/standardPregraph/stack.c
index 04f2b6e..d48fa09 100644
--- a/standardPregraph/stack.c
+++ b/standardPregraph/stack.c
@@ -1,7 +1,7 @@
/*
* stack.c
*
- * Copyright (c) 2008-2012 BGI-Shenzhen <soap at genomics dot org dot cn>.
+ * Copyright (c) 2008-2016 Ruibang Luo <aquaskyline.com>.
*
* This file is part of SOAPdenovo.
*
@@ -22,144 +22,144 @@
#include "stack.h"
-STACK * createStack ( int num_items, size_t unit_size )
+STACK *createStack ( int num_items, size_t unit_size )
{
- STACK * newStack = ( STACK * ) malloc ( 1 * sizeof ( STACK ) );
- newStack->block_list = NULL;
- newStack->items_per_block = num_items;
- newStack->item_size = unit_size;
- newStack->item_c = 0;
- return newStack;
+ STACK *newStack = ( STACK * ) malloc ( 1 * sizeof ( STACK ) );
+ newStack->block_list = NULL;
+ newStack->items_per_block = num_items;
+ newStack->item_size = unit_size;
+ newStack->item_c = 0;
+ return newStack;
}
-void emptyStack ( STACK * astack )
+void emptyStack ( STACK *astack )
{
- BLOCK_STARTER * block;
+ BLOCK_STARTER *block;
- if ( !astack || !astack->block_list )
- {
- return;
- }
+ if ( !astack || !astack->block_list )
+ {
+ return;
+ }
- block = astack->block_list;
+ block = astack->block_list;
- if ( block->next )
- {
- block = block->next;
- }
+ if ( block->next )
+ {
+ block = block->next;
+ }
- astack->block_list = block;
- astack->item_c = 0;
- astack->index_in_block = 0;
+ astack->block_list = block;
+ astack->item_c = 0;
+ astack->index_in_block = 0;
}
-void freeStack ( STACK * astack )
+void freeStack ( STACK *astack )
{
- BLOCK_STARTER * ite_block, *temp_block;
-
- if ( !astack )
- {
- return;
- }
-
- ite_block = astack->block_list;
-
- if ( ite_block )
- {
- while ( ite_block->next )
- {
- ite_block = ite_block->next;
- }
- }
-
- while ( ite_block )
- {
- temp_block = ite_block;
- ite_block = ite_block->prev;
- free ( ( void * ) temp_block );
- }
-
- free ( ( void * ) astack );
+ BLOCK_STARTER *ite_block, *temp_block;
+
+ if ( !astack )
+ {
+ return;
+ }
+
+ ite_block = astack->block_list;
+
+ if ( ite_block )
+ {
+ while ( ite_block->next )
+ {
+ ite_block = ite_block->next;
+ }
+ }
+
+ while ( ite_block )
+ {
+ temp_block = ite_block;
+ ite_block = ite_block->prev;
+ free ( ( void * ) temp_block );
+ }
+
+ free ( ( void * ) astack );
}
-void stackBackup ( STACK * astack )
+void stackBackup ( STACK *astack )
{
- astack->block_backup = astack->block_list;
- astack->index_backup = astack->index_in_block;
- astack->item_c_backup = astack->item_c;
+ astack->block_backup = astack->block_list;
+ astack->index_backup = astack->index_in_block;
+ astack->item_c_backup = astack->item_c;
}
-void stackRecover ( STACK * astack )
+void stackRecover ( STACK *astack )
{
- astack->block_list = astack->block_backup;
- astack->index_in_block = astack->index_backup;
- astack->item_c = astack->item_c_backup;
+ astack->block_list = astack->block_backup;
+ astack->index_in_block = astack->index_backup;
+ astack->item_c = astack->item_c_backup;
}
-void * stackPop ( STACK * astack )
+void *stackPop ( STACK *astack )
{
- BLOCK_STARTER * block;
-
- if ( !astack || !astack->block_list || !astack->item_c )
- {
- return NULL;
- }
-
- astack->item_c--;
- block = astack->block_list;
-
- if ( astack->index_in_block == 1 )
- {
- if ( block->next )
- {
- astack->block_list = block->next;
- astack->index_in_block = astack->items_per_block;
- }
- else
- {
- astack->index_in_block = 0;
- astack->item_c = 0;
- }
-
- return ( void * ) ( ( void * ) block + sizeof ( BLOCK_STARTER ) );
- }
-
- return ( void * ) ( ( void * ) block + sizeof ( BLOCK_STARTER ) + astack->item_size * ( --astack->index_in_block ) );
+ BLOCK_STARTER *block;
+
+ if ( !astack || !astack->block_list || !astack->item_c )
+ {
+ return NULL;
+ }
+
+ astack->item_c--;
+ block = astack->block_list;
+
+ if ( astack->index_in_block == 1 )
+ {
+ if ( block->next )
+ {
+ astack->block_list = block->next;
+ astack->index_in_block = astack->items_per_block;
+ }
+ else
+ {
+ astack->index_in_block = 0;
+ astack->item_c = 0;
+ }
+
+ return ( void * ) ( ( void * ) block + sizeof ( BLOCK_STARTER ) );
+ }
+
+ return ( void * ) ( ( void * ) block + sizeof ( BLOCK_STARTER ) + astack->item_size * ( --astack->index_in_block ) );
}
-void * stackPush ( STACK * astack )
+void *stackPush ( STACK *astack )
{
- BLOCK_STARTER * block;
-
- if ( !astack )
- {
- return NULL;
- }
-
- astack->item_c++;
-
- if ( !astack->block_list || ( astack->index_in_block == astack->items_per_block && !astack->block_list->prev ) )
- {
- block = malloc ( sizeof ( BLOCK_STARTER ) + astack->items_per_block * astack->item_size );
- block->prev = NULL;
-
- if ( astack->block_list )
- {
- astack->block_list->prev = block;
- }
-
- block->next = astack->block_list;
- astack->block_list = block;
- astack->index_in_block = 1;
- return ( void * ) ( ( void * ) block + sizeof ( BLOCK_STARTER ) );
- }
- else if ( astack->index_in_block == astack->items_per_block && astack->block_list->prev )
- {
- astack->block_list = astack->block_list->prev;
- astack->index_in_block = 1;
- return ( void * ) ( ( void * ) astack->block_list + sizeof ( BLOCK_STARTER ) );
- }
-
- block = astack->block_list;
- return ( void * ) ( ( void * ) block + sizeof ( BLOCK_STARTER ) + astack->item_size * astack->index_in_block++ );
+ BLOCK_STARTER *block;
+
+ if ( !astack )
+ {
+ return NULL;
+ }
+
+ astack->item_c++;
+
+ if ( !astack->block_list || ( astack->index_in_block == astack->items_per_block && !astack->block_list->prev ) )
+ {
+ block = malloc ( sizeof ( BLOCK_STARTER ) + astack->items_per_block * astack->item_size );
+ block->prev = NULL;
+
+ if ( astack->block_list )
+ {
+ astack->block_list->prev = block;
+ }
+
+ block->next = astack->block_list;
+ astack->block_list = block;
+ astack->index_in_block = 1;
+ return ( void * ) ( ( void * ) block + sizeof ( BLOCK_STARTER ) );
+ }
+ else if ( astack->index_in_block == astack->items_per_block && astack->block_list->prev )
+ {
+ astack->block_list = astack->block_list->prev;
+ astack->index_in_block = 1;
+ return ( void * ) ( ( void * ) astack->block_list + sizeof ( BLOCK_STARTER ) );
+ }
+
+ block = astack->block_list;
+ return ( void * ) ( ( void * ) block + sizeof ( BLOCK_STARTER ) + astack->item_size * astack->index_in_block++ );
}
diff --git a/update.log b/update.log
deleted file mode 100644
index 5f6e8b5..0000000
--- a/update.log
+++ /dev/null
@@ -1,91 +0,0 @@
-r240 | 2013-07-09 11:30:03 +0800 (Tue, 09 Jul 2013)
-
-Fix a bug in reading files in 'map' step. This bug might lead to seg fault.
-
-------------------------------------------------------------------------
-r239 | 2013-06-26 09:41:39 +0800 (Wed, 26 Jun 2013)
-
-1) Fix the bug of reading fasta file in map step. This bug was introduced when
- fixing a bug of reading fastq file in r238.
-
-------------------------------------------------------------------------
-r224 - r238 | 2013-06-13
-
-1) Fix a serious bug in 'map' step of version r223. This bug can lead
- to incorrect pairing of PE reads in LIB of even order, e.g., the
- 2nd LIB, the 4th LIB and so on...And these affected LIBs may not
- contribute to the construction of scaffold.
-2) Merge 'standPregraph' and 'sparsePregraph'. Now, there are only two
- executable programs: SOAPdenovo-63mer and SOAPdenovo-127mer. User
- can choose to use 'pregraph' for standard Kmer graph or
- 'sparse_pregraph' for sparse Kmer graph.
-3) Add an option for debug version compilation. User can use
- 'make debug=1' to obtain programs for debug.
-4) Fix a bug in sorting edges in 'contig' step.
-5) Fix a bug in reading files when using multi-kmer. Now the
- 'max_read_length' will change according to the LIB being red.
-
-------------------------------------------------------------------------
-r223 | 2012-12-28 10:11:43 +0800 (Fri, 28 Dec 2012)
-
-Fix the problem that parameter k doesn't work when k is larger than 63
-for 127mer version.
-
-------------------------------------------------------------------------
-r222 | 2012-12-21 14:45:49 +0800 (Fri, 21 Dec 2012)
-
-1) Change some codes so that program can handle reads longer than 5000.
-2) Add a new perl script which can seperate singletons from scaffolds in
- *.scafSeq file.
-
-------------------------------------------------------------------------
-r221 | 2012-12-07 14:27:02 +0800 (Fri, 07 Dec 2012)
-
-Fix a bug in reading files which might cause zombie process.
-
-------------------------------------------------------------------------
-r220 | 2012-11-26 10:09:45 +0800 (Mon, 26 Nov 2012)
-
-Fix bug in aio that the buffer was not enough for fq for long reads.
-
-------------------------------------------------------------------------
-r219 | 2012-11-08 12:58:45 +0800 (Thu, 08 Nov 2012)
-
-Fix a bug that using -r 1 will casuse the infomation loss of MaxReadLen
-and MinReadLen in *.preGraphBasic file in pregraph_sparse module.
-
-------------------------------------------------------------------------
-r218 | 2012-11-08 11:04:54 +0800 (Thu, 08 Nov 2012)
-
-Output palindrome sequence only once now instead of twice before in
-pregraph_sparse module.
-
-------------------------------------------------------------------------
-r217 | 2012-11-01 13:09:50 +0800 (Thu, 01 Nov 2012)
-
-Fix bug in scaffolding which may lead to scaffold consisting of none
-or only one contig.
-
-------------------------------------------------------------------------
-r216 | 2012-10-31 14:53:29 +0800 (Wed, 31 Oct 2012)
-
-Fix a bug of 'pregraph-sparse' which may lead to segmentation fault in
-'contig' step if option -R is set and there are reads longer than 100bp.
-
-------------------------------------------------------------------------
-r215 | 2012-10-16 18:53:28 +0800 (Tue, 16 Oct 2012)
-
-Fix a bug of aio which happens rarely in 'pregraph' step when there are
-reads shorter than Kmer.
-
-------------------------------------------------------------------------
-r214 | 2012-10-08 15:58:09 +0800 (Mon, 08 Oct 2012)
-
-Modify usage description of '-V'.
-
-------------------------------------------------------------------------
-r213 | 2012-09-29 09:24:32 +0800 (Sat, 29 Sep 2012)
-
-Fix a bug which might happen in 'contig' step if the 'pregraph-sparse' is
-used to replace the regular 'pregraph'.
-
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/soapdenovo2.git
More information about the debian-med-commit
mailing list