[med-svn] [Git][med-team/soapdenovo2][upstream] New upstream version 242+dfsg

Nilesh Patra gitlab at salsa.debian.org
Tue Oct 27 14:54:03 GMT 2020



Nilesh Patra pushed to branch upstream at Debian Med / soapdenovo2


Commits:
2b05218e by Nilesh Patra at 2020-10-27T19:30:23+05:30
New upstream version 242+dfsg
- - - - -


26 changed files:

- + .travis.yml
- Makefile
- README.md
- + example.config
- fusion/check.c
- fusion/orderContig.c
- sparsePregraph/Makefile
- sparsePregraph/build_graph.cpp
- sparsePregraph/build_preArc.cpp
- sparsePregraph/inc/core.h
- standardPregraph/Makefile
- standardPregraph/attachPEinfo.c
- standardPregraph/inc/global.h
- standardPregraph/inc/newhash.h
- standardPregraph/iterate.c
- standardPregraph/loadPreGraph.c
- standardPregraph/map.c
- standardPregraph/newhash.c
- standardPregraph/node2edge.c
- standardPregraph/orderContig.c
- standardPregraph/prlHashCtg.c
- standardPregraph/prlHashReads.c
- standardPregraph/prlRead2Ctg.c
- standardPregraph/prlRead2path.c
- standardPregraph/read2edge.c
- standardPregraph/readseq1by1.c


Changes:

=====================================
.travis.yml
=====================================
@@ -0,0 +1,8 @@
+language: cpp
+
+compiler:
+    - gcc
+
+
+install: true
+script: make 
\ No newline at end of file


=====================================
Makefile
=====================================
@@ -1,7 +1,7 @@
 MAKEFLAGS += --no-print-directory
 CC = g++
 ifdef debug
-CFLAGS=         -O0 -g -fomit-frame-pointer
+CFLAGS=         -O0 -g -fomit-frame-pointer -DDEBUG
 else
 CFLAGS=         -O3 -fomit-frame-pointer
 endif
@@ -10,7 +10,7 @@ SUBDIRS = sparsePregraph standardPregraph fusion
 PROG=       SOAPdenovo-63mer SOAPdenovo-127mer SOAPdenovo-fusion
 INCLUDES=   -I./sparsePregraph/inc -I./standardPregraph/inc
 
-LIBPATH=    -L/lib64 -L/usr/lib64 -L./sparsePregraph/inc -L./standardPregraph/inc
+LIBPATH=    -L./sparsePregraph/inc -L./standardPregraph/inc -L/lib64 -L/usr/lib64
 LIBS=       -pthread -lz -lm
 EXTRA_FLAGS= 
 
@@ -44,21 +44,21 @@ SOAPdenovo-fusion:
 
 ifdef debug
 SOAPdenovo-63mer:
-	@cd sparsePregraph;make 63mer=1 debug=1;cd ..;
-	@cd standardPregraph;make 63mer=1 debug=1;cd ..;
+	@cd sparsePregraph;make 63mer=1 debug=1 clean all;cd ..;
+	@cd standardPregraph;make 63mer=1 debug=1 clean all;cd ..;
 	@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-63mer
 SOAPdenovo-127mer:
-	@cd sparsePregraph;make 127mer=1 debug=1;cd ..;
-	@cd standardPregraph;make 127mer=1 debug=1;cd ..;
+	@cd sparsePregraph;make 127mer=1 debug=1 clean all;cd ..;
+	@cd standardPregraph;make 127mer=1 debug=1 clean all;cd ..;
 	@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-127mer
 else
 SOAPdenovo-63mer:
-	@cd sparsePregraph;make 63mer=1;cd ..;
-	@cd standardPregraph;make 63mer=1;cd ..;
+	@cd sparsePregraph;make 63mer=1 clean all;cd ..;
+	@cd standardPregraph;make 63mer=1 clean all;cd ..;
 	@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-63mer
 SOAPdenovo-127mer:
-	@cd sparsePregraph;make 127mer=1;cd ..;
-	@cd standardPregraph;make 127mer=1;cd ..;
+	@cd sparsePregraph;make 127mer=1 clean all;cd ..;
+	@cd standardPregraph;make 127mer=1 clean all;cd ..;
 	@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-127mer
 endif
 


=====================================
README.md
=====================================
@@ -1,3 +1,5 @@
+![Build Status](https://www.travis-ci.org/cchd0001/SOAPdenovo2.svg?branch=master)
+
 # Manual of SOAPdenovo2
 
 ## About MEGAHIT
@@ -16,7 +18,7 @@ MEGAHIT: An ultra-fast single-node solution for large and complex metagenomics a
 
 ## For MAC users
 
-Please use <a href="http://brew.sh">brew</a> to install SOAPdenovo. SOAPdenovo's package in Homebrew-science is managed by Shaun Jackman. 
+Please use <a href="http://brew.sh">brew</a> to SOAPdenovo. SOAPdenovo's package in Homebrew-science is managed by Shaun Jackman. 
 
 ## Introduction
 
@@ -28,7 +30,7 @@ SOAPdenovo aims for large plant and animal genomes, although it also works well
 
 ## Installation
 1. You can download the pre-compiled binary according to your platform, unpack using "tar -zxf  ${destination folder} download.tgz" and execute directly.
-2. Or download the source code, unpack to ${destination folder} with the method above, and compile by using GNU make with command "make" at ${destination folder}/SOAPdenovo-V2.04. Then install executable to ${destination folder}/SOAPdenovo-V2.04/bin using "make install"
+2. Or download the source code, unpack to ${destination folder} with the method above, and compile by using GNU make with command "make" at ${destination folder}/SOAPdenovo-V2.04.
 
 ## How to use it
 


=====================================
example.config
=====================================
@@ -0,0 +1,61 @@
+#maximal read length
+max_rd_len=100
+[LIB]
+#average insert size
+avg_ins=200
+#if sequence needs to be reversed
+reverse_seq=0
+#in which part(s) the reads are used
+asm_flags=3
+#use only first 100 bps of each read
+rd_len_cutoff=100
+#in which order the reads are used while scaffolding
+rank=1
+# cutoff of pair number for a reliable connection (at least 3 for short insert size)
+pair_num_cutoff=3
+#minimum aligned length to contigs for a reliable read location (at least 32 for short insert size)
+map_len=32
+#a pair of fastq file, read 1 file should always be followed by read 2 file
+q1=/path/**LIBNAMEA**/fastq1_read_1.fq
+q2=/path/**LIBNAMEA**/fastq1_read_2.fq
+#another pair of fastq file, read 1 file should always be followed by read 2 file
+q1=/path/**LIBNAMEA**/fastq2_read_1.fq
+q2=/path/**LIBNAMEA**/fastq2_read_2.fq
+#a pair of fasta file, read 1 file should always be followed by read 2 file
+f1=/path/**LIBNAMEA**/fasta1_read_1.fa
+f2=/path/**LIBNAMEA**/fasta1_read_2.fa
+#another pair of fasta file, read 1 file should always be followed by read 2 file
+f1=/path/**LIBNAMEA**/fasta2_read_1.fa
+f2=/path/**LIBNAMEA**/fasta2_read_2.fa
+#fastq file for single reads
+q=/path/**LIBNAMEA**/fastq1_read_single.fq
+#another fastq file for single reads
+q=/path/**LIBNAMEA**/fastq2_read_single.fq
+#fasta file for single reads
+f=/path/**LIBNAMEA**/fasta1_read_single.fa
+#another fasta file for single reads
+f=/path/**LIBNAMEA**/fasta2_read_single.fa
+#a single fasta file for paired reads
+p=/path/**LIBNAMEA**/pairs1_in_one_file.fa
+#another single fasta file for paired reads
+p=/path/**LIBNAMEA**/pairs2_in_one_file.fa
+#bam file for single or paired reads, reads 1 in paired reads file should always be followed by reads 2
+#    NOTE: If a read in bam file fails platform/vendor quality checks(the flag field 0x0200 is set), itself and it's paired read would be ignored..
+b=/path/**LIBNAMEA**/reads1_in_file.bam
+#another bam file for single or paired reads
+b=/path/**LIBNAMEA**/reads2_in_file.bam
+[LIB]
+avg_ins=2000
+reverse_seq=1
+asm_flags=2
+rank=2
+# cutoff of pair number for a reliable connection (at least 5 for large insert size)
+pair_num_cutoff=5
+#minimum aligned length to contigs for a reliable read location (at least 35 for large insert size)
+map_len=35
+q1=/path/**LIBNAMEB**/fastq_read_1.fq
+q2=/path/**LIBNAMEB**/fastq_read_2.fq
+f1=/path/**LIBNAMEA**/fasta_read_1.fa
+f2=/path/**LIBNAMEA**/fasta_read_2.fa
+p=/path/**LIBNAMEA**/pairs_in_one_file.fa
+b=/path/**LIBNAMEA**/reads_in_file.bam


=====================================
fusion/check.c
=====================================
@@ -39,7 +39,7 @@ void *ckalloc(unsigned long long amount)
 
   if ((p = (void *) calloc( 1, (unsigned long long) amount)) == NULL && amount != 0)
     {
-      printf("not enought memory");
+      printf("Out of memory");
       fflush(stdout);
       exit(-1);
     }


=====================================
fusion/orderContig.c
=====================================
@@ -2701,6 +2701,7 @@ void outputLinks(FILE *fp, int insertS)
   CONNECT *cnts, *temp_cnt;
 
   //printf("outputLinks, %d contigs\n",num_ctg);
+  fprintf ( fp, "id contigID\tgapLen\tweight\tinsertS\n");
   for(i = 1; i <= num_ctg; i++)
     {
       cnts = contig_array[i].downwardConnect;


=====================================
sparsePregraph/Makefile
=====================================
@@ -1,6 +1,6 @@
 CC=            g++
 ifdef debug
-CFLAGS=         -O0 -g -fomit-frame-pointer
+CFLAGS=         -O0 -g -fomit-frame-pointer -DDEBUG
 else
 CFLAGS=         -O3 -fomit-frame-pointer -w
 endif
@@ -12,8 +12,8 @@ OBJS=		build_graph.o build_edge.o multi_threads.o \
 PROG=           
 INCLUDES=	-I./inc
 SUBDIRS=    . 
-LIBPATH=	-L/usr/lib64
-LIBS=       -pthread -lz -L./inc
+LIBPATH=	-L./inc -L/usr/lib64
+LIBS=       -pthread -lz
 EXTRA_FLAGS=
 VERSION =    1.0.3
 


=====================================
sparsePregraph/build_graph.cpp
=====================================
@@ -234,19 +234,20 @@ static void process_round1_threaded ( struct read_t *read, struct hashtable2 *ht
 
       pthread_spin_lock ( &locks[hash_idx[j]] );
       found[j] = look_up_in_a_list2_r1 ( &seq[j], ( bucket2_r1 ** * ) &bktptr[j] ); //lock...
-      pthread_spin_unlock ( &locks[hash_idx[j]] );
+      //pthread_spin_unlock ( &locks[hash_idx[j]] );
 
       if ( found[j] == 0 )
         {
-          pthread_spin_lock ( &locks[hash_idx[j]] );
+          //pthread_spin_lock ( &locks[hash_idx[j]] );
           * ( bktptr[j] ) = ( struct bucket2 * ) malloc ( sizeof ( struct bucket2_r1 ) ); //lock ...
           memset ( * ( bktptr[j] ), 0, sizeof ( struct bucket2_r1 ) );
           memcpy ( & ( ( ( struct bucket2_r1 * ) * ( bktptr[j] ) )->kmer_t2.kmer ), & ( seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
           ( ( struct bucket2_r1 * ) * ( bktptr[j] ) )->kmer_info.cov1 = 0;
           // the cvg is useless in round 1
-          pthread_spin_unlock ( &locks[hash_idx[j]] );
+          //pthread_spin_unlock ( &locks[hash_idx[j]] );
           ( *bucket_count ) ++;
         }
+	  pthread_spin_unlock(&locks[hash_idx[j]]);
 
       j = j + h;
 


=====================================
sparsePregraph/build_preArc.cpp
=====================================
@@ -94,10 +94,10 @@ void build_vertexes ( vertex_hash2 *v_ht, int K_size, char *edge_file )
   size_t line_len, edge_len_left;
   int edge_len;
   int cvg;
-  bool bal_ed;//回文为0
+  int bal_ed;//回文为0
   const int BUFF_LEN = 1024;
   char line[BUFF_LEN];
-  char str[32];
+  //char str[32];
   char to_buff[BUFF_LEN];//buffer 2k edge seq  BUFF_LEN>4*K_size
   int processed = 0; //0表示未处理 1表示 处理完毕 2 表示已经处理了from_vertex ,to_vertex 还没有处理
   size_t edge_id = 0;
@@ -125,13 +125,13 @@ void build_vertexes ( vertex_hash2 *v_ht, int K_size, char *edge_file )
             }
 
 #ifdef _63MER_
-          sscanf ( line + 7, "%d,%llx %llx ,%llx %llx ,%s %d,%d", &edge_len,
-                   & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], str, &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
+          sscanf ( line, ">length %d,%llx %llx,%llx %llx,cvg %d,%d", &edge_len,
+                   & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1],  &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
 #endif
 #ifdef _127MER_
-          sscanf ( line + 7, "%d,%llx %llx %llx %llx ,%llx %llx %llx %llx ,%s %d,%d", &edge_len,
+          sscanf ( line, ">length %d,%llx %llx %llx %llx,%llx %llx %llx %llx,cvg %d,%d", &edge_len,
                    & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( from_kmer.kmer ) [2], & ( from_kmer.kmer ) [3],
-                   & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], & ( to_kmer.kmer ) [2], & ( to_kmer.kmer ) [3], str, &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
+                   & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], & ( to_kmer.kmer ) [2], & ( to_kmer.kmer ) [3],  &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
 #endif
           edge_len_left = K_size + edge_len;
           processed = 0;
@@ -1311,7 +1311,7 @@ int put_path_2_buffer ( struct edge_path_buffer *buffer, unsigned int *path )
 
   memcpy ( ( buffer->path_buffer ) [pos], path, buffer->max_path_length * sizeof ( unsigned int ) );
 
-  for ( unsigned int i = 1; i < path[0]; i++ )
+  for ( unsigned int i = 1; i <= path[0]; i++ )
     {
       pthread_spin_lock ( ( buffer->locks ) + path[i] );
       ( ( buffer->mark_on_edge ) [path[i]] ) ++;


=====================================
sparsePregraph/inc/core.h
=====================================
@@ -106,7 +106,7 @@ inline void Init_HT2 ( struct hashtable2 *ht, size_t ht_sz )
 }
 
 
-inline bool look_up_in_a_list2_r1 ( struct kmer_t2 *seq, struct bucket2_r1 *** ptr )
+inline bool look_up_in_a_list2_r1 ( struct kmer_t2 *seq, struct bucket2_r1 **volatile* ptr )
 {
   while ( ( **ptr ) != NULL )
     {
@@ -121,7 +121,7 @@ inline bool look_up_in_a_list2_r1 ( struct kmer_t2 *seq, struct bucket2_r1 *** p
   return ( ( **ptr ) != NULL );
 }
 
-inline bool look_up_in_a_list2 ( struct kmer_t2 *seq, struct bucket2 *** ptr )
+inline bool look_up_in_a_list2 ( struct kmer_t2 *seq, struct bucket2 **volatile* ptr )
 {
   while ( ( **ptr ) != NULL )
     {


=====================================
standardPregraph/Makefile
=====================================
@@ -9,7 +9,7 @@ CC=             gcc
 GCCVERSIONMAJOR := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
 GCCVERSIONMINOR := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 4)
 ifdef debug
-CFLAGS=         -O0 -g -fomit-frame-pointer #-msse4.2
+CFLAGS=         -O0 -g -fomit-frame-pointer -DDEBUG#-msse4.2
 else
 CFLAGS=         -O3 -fomit-frame-pointer -w #-msse4.2
 endif
@@ -27,7 +27,7 @@ OBJS=		arc.o attachPEinfo.o bubble.o check.o compactEdge.o \
 PROG=       SOAPdenovo-63mer
 INCLUDES=   -Iinc
 SUBDIRS=    .
-LIBPATH=    -L/lib64 -L/usr/lib64 -L./inc
+LIBPATH=    -L./inc -L/lib64 -L/usr/lib64
 LIBS=       -pthread -lz -lm 
 EXTRA_FLAGS=
 


=====================================
standardPregraph/attachPEinfo.c
=====================================
@@ -409,7 +409,7 @@ int connectByPE_grad ( FILE *fp, int peGrad, char *line )
 
   if ( strlen ( line ) )
     {
-      sscanf ( line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos );
+      sscanf ( line, "%lld %u %d", &pre_readno, &pre_contigno, &pre_pos );
 
       if ( pre_readno <= minno )
         {
@@ -427,7 +427,7 @@ int connectByPE_grad ( FILE *fp, int peGrad, char *line )
 
   while ( fgets ( line, lineLen, fp ) != NULL )
     {
-      sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
+      sscanf ( line, "%lld %u %d", &readno, &contigno, &pos );
 
       if ( readno > maxno )
         {
@@ -520,7 +520,7 @@ int connectByPE_grad_gz ( gzFile *fp, int peGrad, char *line )
 
   if ( strlen ( line ) )
     {
-      sscanf ( line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos );
+      sscanf ( line, "%lld %u %d", &pre_readno, &pre_contigno, &pre_pos );
 
       if ( pre_readno <= minno )
         {
@@ -538,7 +538,7 @@ int connectByPE_grad_gz ( gzFile *fp, int peGrad, char *line )
 
   while ( gzgets ( fp, line, lineLen ) != NULL )
     {
-      sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
+      sscanf ( line, "%lld %u %d", &readno, &contigno, &pos );
 
       if ( readno > maxno )
         {


=====================================
standardPregraph/inc/global.h
=====================================
@@ -91,6 +91,7 @@ int initKmerSetSize = 0;   // init_size = (ubyte8) ((double) initKmerSetSize * 1
 long known_genome_size = 0;
 int smallKmer = 0;  // the kmer of the step "Map"
 int deltaKmer = 0;  // for map, K-k
+long contigTotalLength = 0 ;
 
 double cvg_low = 0.1;
 double cvg_high = 2;


=====================================
standardPregraph/inc/newhash.h
=====================================
@@ -28,6 +28,7 @@
 #endif
 
 #define MAX_KMER_COV 63
+#define MAX_KMER_COV_ALL 255
 #define EDGE_BIT_SIZE 6 //6 bit for each edge
 #define EDGE_XOR_MASK 0x3FU
 #define LINKS_BITS 0x00FFFFFFU
@@ -35,8 +36,10 @@
 #define get_kmer_seq(mer) ((mer).seq)
 #define set_kmer_seq(mer, val) ((mer).seq = val)
 
-#define get_kmer_left_cov(mer, idx) (((mer).l_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
-#define set_kmer_left_cov(mer, idx, val) ((mer).l_links = ((mer).l_links&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) )
+//#define get_kmer_left_cov(mer, idx) (((mer).l_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
+//#define set_kmer_left_cov(mer, idx, val) ((mer).l_links = ((mer).l_links&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) )
+#define get_kmer_left_cov(mer, idx) (((mer).l_links.l_links.l_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
+#define set_kmer_left_cov(mer, idx, val) ((mer).l_links.l_links.l_links = (((mer).l_links.l_links.l_links)&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) )
 #define get_kmer_left_covs(mer) (get_kmer_left_cov(mer, 0) + get_kmer_left_cov(mer, 1) + get_kmer_left_cov(mer, 2) + get_kmer_left_cov(mer, 3))
 
 #define get_kmer_right_cov(mer, idx) (((mer).r_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
@@ -52,6 +55,15 @@
 #define clear_kmer_entity_del(flags, idx)  ((flags)[(idx)>>4] &= ~(0x02u<<(((idx)&0x0f)<<1)))
 #define exists_kmer_entity(flags, idx)     (!((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x03))
 
+#define set_kmer_edge_id(mer, idx)  (((mer).l_links.edgeId)=(idx))
+#define get_kmer_edge_id(mer)  ((mer).l_links.edgeId)
+#define set_kmer_contig_id(mer , idx)  (((mer).l_links.contigId) = (idx))
+#define get_kmer_contig_id(mer)  ((mer).l_links.contigId)
+#define set_kmer_cov(mer, idx) (((mer).l_links.l_links.covs) = ((idx) & 0xff))
+#define get_kmer_cov(mer) ((mer).l_links.l_links.covs)
+
+
+
 #ifdef MER127
 typedef __uint128_t u128b;
 
@@ -62,11 +74,24 @@ typedef struct u256b
 } U256b;
 #else
 #endif
+typedef struct  l_link_pack
+{
+    ubyte4 l_links :  4 * EDGE_BIT_SIZE;
+    ubyte4 covs : 8;
+} L_link_pack;
+ 
+typedef union l_link_union
+{
+    L_link_pack l_links;
+    ubyte4 edgeId;
+    ubyte4 contigId;
+} L_link_union;
 
 typedef struct kmer_st
 {
   Kmer seq;       //kmer set
-  ubyte4 l_links;                     // sever as edgeID since make_edge
+  L_link_union l_links;
+  //ubyte4 l_links;                     // sever as edgeID since make_edge
   ubyte4 r_links: 4 * EDGE_BIT_SIZE;
   ubyte4 linear: 1;
   ubyte4 deleted: 1;


=====================================
standardPregraph/iterate.c
=====================================
@@ -25,7 +25,9 @@
 #include "kmerhash.h"
 #include "extfunc.h"
 #include "extvab.h"
-
+#if DEBUG
+#define inline
+#endif
 static Kmer *kmerBuffer;
 static ubyte8 *hashBanBuffer;
 
@@ -1060,7 +1062,7 @@ void addEdge ( unsigned int from, unsigned int to, char ch, int bal_edge, unsign
   if ( num_ed_temp + 1 > num_ed_limit )
     {
       unsigned int new_num_ed = num_ed_limit * 1.2;
-      edge_array = ( EDGE * ) ckrealloc ( edge_array, ( new_num_ed + 1 ) * sizeof ( EDGE ), ( num_ed_limit + 1 ) * sizeof ( EDGE ) );
+      edge_array = ( EDGE * ) ckrealloc ( edge_array, ( new_num_ed + 3 ) * sizeof ( EDGE ), ( num_ed_limit + 3 ) * sizeof ( EDGE ) );
       num_ed_limit = new_num_ed;
       int j;
 
@@ -1888,11 +1890,11 @@ static int cmp_seq ( const void *a, const void *b )
                 }
             }
 
-          if ( i == A->length && i < B->length )
+          if ( ( A->length == 0 ) || ( i == A->length && i < B->length ) )
             {
               return -1;
             }
-          else if ( i < A->length && i ==  B->length )
+          else if ( ( B->length == 0 ) || ( i < A->length && i ==  B->length ) )
             {
               return 1;
             }
@@ -2731,4 +2733,3 @@ void Iterate ( char *libfile, char *graph, int maxk, int M )   //boolean keepRea
   fprintf ( stderr, "Iteration finished.\n" );
   fprintf ( stderr, "Time spent on iteration: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
 }
-


=====================================
standardPregraph/loadPreGraph.c
=====================================
@@ -457,7 +457,7 @@ void loadEdge ( char *graphfile )
   sprintf ( name, "%s.edge.gz", graphfile );
   fp = gzopen ( name, "r" );
   num_ed_limit = 1.2 * num_ed;
-  edge_array = ( EDGE * ) ckalloc ( ( num_ed_limit + 1 ) * sizeof ( EDGE ) );
+  edge_array = ( EDGE * ) ckalloc ( ( num_ed_limit + 3 ) * sizeof ( EDGE ) );
 
   for ( j = num_ed + 1; j <= num_ed_limit; j++ )
     {


=====================================
standardPregraph/map.c
=====================================
@@ -29,7 +29,7 @@
 static void initenv ( int argc, char **argv );
 static char shortrdsfile[256];
 static char graphfile[256];
-
+extern long contigTotalLength;
 static void display_map_usage ();
 
 /*************************************************
@@ -159,7 +159,7 @@ void initenv ( int argc, char **argv )
   inpseq = outseq = 0;
   fprintf ( stderr, "Parameters: map " );
 
-  while ( ( copt = getopt ( argc, argv, "s:g:K:p:k:f" ) ) != EOF )
+  while ( ( copt = getopt ( argc, argv, "s:g:K:p:k:h:f" ) ) != EOF )
     {
       //printf("get option\n");
       switch ( copt )
@@ -194,6 +194,12 @@ void initenv ( int argc, char **argv )
           smallKmer = atoi ( temp );
           break;
 
+        case 'h':
+          fprintf ( stderr, "-h %s ", optarg );
+          sscanf ( optarg, "%s", temp );
+          contigTotalLength = atol( temp );
+          break;
+
         case 'f':
           fill = 1;
           fprintf ( stderr, "-f " );
@@ -220,9 +226,10 @@ void initenv ( int argc, char **argv )
 
 static void display_map_usage ()
 {
-  fprintf ( stderr, "\nmap -s configFile -g inputGraph [-f] [-p n_cpu -k kmer_R2C]\n" );
+  fprintf ( stderr, "\nmap -s configFile -g inputGraph [-f] [-p n_cpu -k kmer_R2C] [-h contig_total_length]\n" );
   fprintf ( stderr, "  -s <string>        configFile: the config file of solexa reads\n" );
   fprintf ( stderr, "  -g <string>        inputGraph: prefix of input graph file names\n" );
+  fprintf ( stderr, "  -h (optional)      total length of contigs for init hash table. [1024]\n" ); 
   fprintf ( stderr, "  -f (optional)      output gap related reads in map step for using SRkgf to fill gap, [NO]\n" );
   fprintf ( stderr, "  -p <int>           n_cpu: number of cpu for use, [8]\n" );
 #ifdef MER127


=====================================
standardPregraph/newhash.c
=====================================
@@ -74,6 +74,7 @@ Return:
 static inline void update_kmer ( kmer_t *mer, ubyte left, ubyte right )
 {
   ubyte4 cov;
+  ubyte4 cov_all;
 
   if ( left < 4 )
     {
@@ -94,6 +95,14 @@ static inline void update_kmer ( kmer_t *mer, ubyte left, ubyte right )
           set_kmer_right_cov ( *mer, right, cov + 1 );
         }
     }
+    if ( left < 4 || right < 4 )
+    {
+       cov_all = get_kmer_cov( *mer );
+       if(cov_all < MAX_KMER_COV_ALL)
+       {
+           set_kmer_cov(*mer,cov_all+1);
+       }
+    }
 }
 
 /*************************************************
@@ -113,7 +122,9 @@ Return:
 *************************************************/
 static inline void set_new_kmer ( kmer_t *mer, Kmer seq, ubyte left, ubyte right )
 {
-  *mer = empty_kmer;
+  // *mer = empty_kmer;
+  memset(mer,0,sizeof(kmer_t));
+  mer->single=1;
   set_kmer_seq ( *mer, seq );
 
   if ( left < 4 )
@@ -125,6 +136,7 @@ static inline void set_new_kmer ( kmer_t *mer, Kmer seq, ubyte left, ubyte right
     {
       set_kmer_right_cov ( *mer, right, 1 );
     }
+    set_kmer_cov(*mer,1);
 }
 
 static inline int is_prime_kh ( ubyte8 num )


=====================================
standardPregraph/node2edge.c
=====================================
@@ -513,7 +513,7 @@ static void merge_linearV2 ( char bal_edge, STACK *nStack, int count, gzFile *fp
               */
             }
 
-          longNode->l_links = edge_c;
+          set_kmer_edge_id(*longNode ,edge_c);
           longNode->twin = ( unsigned char ) ( bal_edge + 1 );
         }
       else
@@ -536,7 +536,7 @@ static void merge_linearV2 ( char bal_edge, STACK *nStack, int count, gzFile *fp
               */
             }
 
-          longNode->l_links = edge_c + bal_edge;
+          set_kmer_edge_id(*longNode, edge_c + bal_edge);
           longNode->twin = ( unsigned char ) ( -bal_edge + 1 );
         }
     }
@@ -570,12 +570,12 @@ static void merge_linearV2 ( char bal_edge, STACK *nStack, int count, gzFile *fp
 
       if ( temp->isSmaller )
         {
-          del_node->l_links = edge_c;
+          set_kmer_edge_id(*del_node, edge_c);
           del_node->twin = ( unsigned char ) ( bal_edge + 1 );
         }
       else
         {
-          del_node->l_links = edge_c + bal_edge;
+          set_kmer_edge_id(*del_node, edge_c + bal_edge);
           del_node->twin = ( unsigned char ) ( -bal_edge + 1 );
         }
     }


=====================================
standardPregraph/orderContig.c
=====================================
@@ -17,11 +17,11 @@
 #define GapUpperBound 300000
 
 #define MaxCntNode 1000
-
+/*
 static int DEBUG = 0;
 static int DEBUG1 = 0;
 static int DEBUG2 = 0;
-
+*/
 static int bySmall = 1;
 
 static boolean static_f = 0;
@@ -5497,6 +5497,7 @@ static void outputLinks ( FILE *fp, int insertS )
   unsigned int i, bal_ctg, bal_toCtg;
   CONNECT *cnts, *temp_cnt;
 
+  fprintf ( fp, "id contigID\tgapLen\tweight\tinsertS\n");
   for ( i = 1; i <= num_ctg; i++ )
     {
       cnts = contig_array[i].downwardConnect;


=====================================
standardPregraph/prlHashCtg.c
=====================================
@@ -28,7 +28,7 @@
 
 //debugging variables
 static long long *kmerCounter;
-
+extern long contigTotalLength;
 //buffer related varibles for chop kmer
 static unsigned int read_c;
 static char **rcSeq;
@@ -143,7 +143,8 @@ static void singleKmer ( int t, KmerSet *kset, unsigned int seq_index, unsigned
           node->twin = 1;
         };
 
-      node->l_links = ctgIdArray[seq_index];
+      //node->l_links = ctgIdArray[seq_index];
+      set_kmer_contig_id(*node ,ctgIdArray[seq_index]);
 
       node->r_links = pos;
     }
@@ -365,9 +366,16 @@ boolean prlContig2nodes ( char *grapfile, int len_cut )
       kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
       KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
 
+
+      long initHashSize = 1024;
+      float loadFactor = 0.77f;
+      if( contigTotalLength != 0 )
+      {
+          initHashSize = contigTotalLength / thrd_num /loadFactor ;
+      }
       for ( i = 0; i < thrd_num; i++ )
         {
-          KmerSets[i] = init_kmerset ( 1024, 0.77f );
+          KmerSets[i] = init_kmerset ( initHashSize, loadFactor );
           thrdSignal[i + 1] = 0;
           paras[i].threadID = i;
           paras[i].mainSignal = &thrdSignal[0];


=====================================
standardPregraph/prlHashReads.c
=====================================
@@ -483,7 +483,13 @@ boolean prlRead2HashTable ( char *libfile, char *outfile )
                   if ( turn == 1 )
                     {
                       turn = 2;
-                      readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );
+                      if( ! readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo ) )
+                      {
+                          fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+                          *(readBuffer1+offset1)='\0';
+                          fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer1+start1);
+                          exit(-1);
+                      }
 
                       if ( ( ++i ) % 100000000 == 0 )
                         {
@@ -532,7 +538,13 @@ boolean prlRead2HashTable ( char *libfile, char *outfile )
                   if ( turn == 2 )
                     {
                       turn = 1;
-                      readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );
+                      if( !readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo ) )
+                      {
+                          fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+                          *(readBuffer2+offset2)='\0';
+                          fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer2+start2);
+                          exit(-1);
+                      }
 
                       if ( ( ++i ) % 100000000 == 0 )
                         {
@@ -608,7 +620,13 @@ boolean prlRead2HashTable ( char *libfile, char *outfile )
 
               while ( start < offset )
                 {
-                  readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );
+                  if( ! readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo ) )
+                  {
+                      fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+                      *(readBuffer1+offset)='\0';
+                      fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer1+start);
+                      exit(-1);
+                  }
 
                   if ( ( ++i ) % 100000000 == 0 )
                     {
@@ -851,9 +869,15 @@ int AIORead ( struct aiocb *mycb, int *offset, char *buf, char *cach, int *rt, i
                   *rt = aio_read ( mycb );
                   return 1;
                 }
+              else if ( 0 == get )
+              {
+                  fprintf(stderr, "Warning : aio_return zero, the size of input file must be N * 32768.\n");
+                  return 2;
+              }
               else
                 {
-                  fprintf(stderr, "Error: aio_return error.\n");
+                  fprintf(stderr, "Error: aio_return %d, error happened.\n",get);
+                  exit(-1);
                 }
 
               /*else
@@ -956,7 +980,8 @@ static void thread_delow ( KmerSet *set, unsigned char thrdID )
                 }
             }
 
-          if ( rs->l_links == 0 && rs->r_links == 0 )
+            //if ( rs->l_links == 0 && rs->r_links == 0 )
+            if ( get_kmer_left_covs(*rs) ==0 && get_kmer_right_covs(*rs) == 0 )
             {
               rs->deleted = 1;
               tips[thrdID]++;
@@ -1027,14 +1052,16 @@ static void thread_mark ( KmerSet *set, unsigned char thrdID )
 
           if ( rs->single )
             {
-              kmerFreq[thrdID][1]++;
+              //kmerFreq[thrdID][1]++;
               counter++;
             }
+          /*
           else
             {
               kmerFreq[thrdID][ ( l_cvg > r_cvg ? l_cvg : r_cvg )]++;
             }
-
+          */
+          kmerFreq[thrdID][get_kmer_cov(*rs)] ++ ; 
           if ( in_num == 1 && out_num == 1 )
             {
               rs->linear = 1;


=====================================
standardPregraph/prlRead2Ctg.c
=====================================
@@ -298,7 +298,7 @@ static void parse1read ( int t )
               continue;
             }
 
-          if ( nodeBuffer[s]->l_links == node->l_links )
+          if ( get_kmer_contig_id(*nodeBuffer[s]) == get_kmer_contig_id(*node) )
             {
               flag++;
               nodeBuffer[s] = NULL;
@@ -342,7 +342,7 @@ static void parse1read ( int t )
   i = pos - start + 1;
   node = nodeBuffer[j];
   isSmaller = smallerBuffer[j];
-  contigID = node->l_links;
+  contigID = get_kmer_contig_id(*node);
   ctgLen = contig_array[contigID].length;
   pos = node->r_links;
 
@@ -407,7 +407,7 @@ static void locate1read ( int t )
 
       i = j - start + 1;
       isSmaller = smallerBuffer[j];
-      contigID = node->l_links;
+      contigID = get_kmer_contig_id(*node);
       ctgLen = contig_array[contigID].length;
       pos = node->r_links;
 


=====================================
standardPregraph/prlRead2path.c
=====================================
@@ -574,22 +574,22 @@ static void search1kmerPlus ( int j, unsigned char thrdID )
 
   if ( smallerBuffer[j] )
     {
-      mixBuffer[j].low2 = node->l_links;
+      mixBuffer[j].low2 = get_kmer_contig_id(*node);
     }
   else
     {
-      mixBuffer[j].low2 = node->l_links + node->twin - 1;
+      mixBuffer[j].low2 = get_kmer_contig_id(*node) + node->twin - 1;
     }
 
 #else
 
   if ( smallerBuffer[j] )
     {
-      mixBuffer[j].low = node->l_links;
+      mixBuffer[j].low = get_kmer_contig_id(*node);;
     }
   else
     {
-      mixBuffer[j].low = node->l_links + node->twin - 1;
+      mixBuffer[j].low = get_kmer_contig_id(*node) + node->twin - 1;
     }
 
 #endif
@@ -635,11 +635,11 @@ static void parse1read ( int t, int threadID )
         {
           if ( isSmaller )
             {
-              edge_index = node->l_links;
+              edge_index = get_kmer_contig_id(*node);
             }
           else
             {
-              edge_index = node->l_links + node->twin - 1;
+              edge_index = get_kmer_contig_id(*node) + node->twin - 1;
             }
 
 #ifdef MER127
@@ -1001,7 +1001,13 @@ void prlRead2edge ( char *libfile, char *outfile )
                   if ( turn == 1 )
                     {
                       turn = 2;
-                      readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );
+                     if( ! readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo ) )
+                      {
+                          fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+                          *(readBuffer1+offset1)='\0';
+                          fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer1+start1);
+                          exit(-1);
+                      }
 
                       if ( ( ++i ) % 100000000 == 0 )
                         {
@@ -1078,7 +1084,13 @@ void prlRead2edge ( char *libfile, char *outfile )
                   if ( turn == 2 )
                     {
                       turn = 1;
-                      readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );
+                      if( !readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo ) )
+                      {
+                          fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+                          *(readBuffer2+offset2)='\0';
+                          fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer2+start2);
+                          exit(-1);
+                      }
 
                       if ( ( ++i ) % 100000000 == 0 )
                         {
@@ -1181,7 +1193,14 @@ void prlRead2edge ( char *libfile, char *outfile )
 
               while ( start < offset )
                 {
-                  readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );
+                  if( ! readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo ) )
+                  {
+                      fprintf ( stderr, "readseqInLib return error! please makesure input file is correct fastq/fasta file \n");
+                      *(readBuffer1+offset)='\0';
+                      fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer1+start);
+                      exit(-1);
+                  }
+
 
                   if ( ( ++i ) % 100000000 == 0 )
                     {


=====================================
standardPregraph/read2edge.c
=====================================
@@ -25,6 +25,9 @@
 #include "newhash.h"
 #include <extfunc.h>
 #include <extvab.h>
+#if DEBUG
+#define inline 
+#endif
 
 static pthread_mutex_t mutex_arc;
 


=====================================
standardPregraph/readseq1by1.c
=====================================
@@ -931,6 +931,10 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
       if ( lib_array[i].paired == 1 )
         {
           readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
+          if( *len_seq < 1 )
+          {
+            return 0;
+          }
 
           if ( lib_array[i].reverse )
             {
@@ -945,6 +949,10 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
         {
           readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
 
+          if( *len_seq < 1 )
+          {
+            return 0;
+          }
           if ( lib_array[i].reverse )
             {
               reverse2k ( src_seq, *len_seq );
@@ -961,6 +969,10 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
       if ( lib_array[i].paired == 1 )
         {
           readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
+          if( *len_seq < 1 )
+          {
+            return 0;
+          }
 
           if ( lib_array[i].reverse )
             {
@@ -974,6 +986,10 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
       else
         {
           readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
+          if( *len_seq < 1 )
+          {
+            return 0;
+          }
 
           if ( lib_array[i].reverse )
             {
@@ -989,10 +1005,18 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
   if ( lib_array[i].curr_type == 6 )
     {
       readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
+      if( *len_seq < 1 )
+      {
+          return 0;
+      }
     }
   else
     {
       readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
+      if( *len_seq < 1 )
+      {
+          return 0;
+      }
     }
 
   /*



View it on GitLab: https://salsa.debian.org/med-team/soapdenovo2/-/commit/2b05218e222c4bc191a008ab8ae3f10820e4c8cc

-- 
View it on GitLab: https://salsa.debian.org/med-team/soapdenovo2/-/commit/2b05218e222c4bc191a008ab8ae3f10820e4c8cc
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201027/607d7d6e/attachment-0001.html>


More information about the debian-med-commit mailing list