[med-svn] [Git][med-team/soapdenovo2][upstream] New upstream version 242+dfsg
Nilesh Patra
gitlab at salsa.debian.org
Tue Oct 27 14:54:03 GMT 2020
Nilesh Patra pushed to branch upstream at Debian Med / soapdenovo2
Commits:
2b05218e by Nilesh Patra at 2020-10-27T19:30:23+05:30
New upstream version 242+dfsg
- - - - -
26 changed files:
- + .travis.yml
- Makefile
- README.md
- + example.config
- fusion/check.c
- fusion/orderContig.c
- sparsePregraph/Makefile
- sparsePregraph/build_graph.cpp
- sparsePregraph/build_preArc.cpp
- sparsePregraph/inc/core.h
- standardPregraph/Makefile
- standardPregraph/attachPEinfo.c
- standardPregraph/inc/global.h
- standardPregraph/inc/newhash.h
- standardPregraph/iterate.c
- standardPregraph/loadPreGraph.c
- standardPregraph/map.c
- standardPregraph/newhash.c
- standardPregraph/node2edge.c
- standardPregraph/orderContig.c
- standardPregraph/prlHashCtg.c
- standardPregraph/prlHashReads.c
- standardPregraph/prlRead2Ctg.c
- standardPregraph/prlRead2path.c
- standardPregraph/read2edge.c
- standardPregraph/readseq1by1.c
Changes:
=====================================
.travis.yml
=====================================
@@ -0,0 +1,8 @@
+language: cpp
+
+compiler:
+ - gcc
+
+
+install: true
+script: make
\ No newline at end of file
=====================================
Makefile
=====================================
@@ -1,7 +1,7 @@
MAKEFLAGS += --no-print-directory
CC = g++
ifdef debug
-CFLAGS= -O0 -g -fomit-frame-pointer
+CFLAGS= -O0 -g -fomit-frame-pointer -DDEBUG
else
CFLAGS= -O3 -fomit-frame-pointer
endif
@@ -10,7 +10,7 @@ SUBDIRS = sparsePregraph standardPregraph fusion
PROG= SOAPdenovo-63mer SOAPdenovo-127mer SOAPdenovo-fusion
INCLUDES= -I./sparsePregraph/inc -I./standardPregraph/inc
-LIBPATH= -L/lib64 -L/usr/lib64 -L./sparsePregraph/inc -L./standardPregraph/inc
+LIBPATH= -L./sparsePregraph/inc -L./standardPregraph/inc -L/lib64 -L/usr/lib64
LIBS= -pthread -lz -lm
EXTRA_FLAGS=
@@ -44,21 +44,21 @@ SOAPdenovo-fusion:
ifdef debug
SOAPdenovo-63mer:
- @cd sparsePregraph;make 63mer=1 debug=1;cd ..;
- @cd standardPregraph;make 63mer=1 debug=1;cd ..;
+ @cd sparsePregraph;make 63mer=1 debug=1 clean all;cd ..;
+ @cd standardPregraph;make 63mer=1 debug=1 clean all;cd ..;
@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-63mer
SOAPdenovo-127mer:
- @cd sparsePregraph;make 127mer=1 debug=1;cd ..;
- @cd standardPregraph;make 127mer=1 debug=1;cd ..;
+ @cd sparsePregraph;make 127mer=1 debug=1 clean all;cd ..;
+ @cd standardPregraph;make 127mer=1 debug=1 clean all;cd ..;
@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-127mer
else
SOAPdenovo-63mer:
- @cd sparsePregraph;make 63mer=1;cd ..;
- @cd standardPregraph;make 63mer=1;cd ..;
+ @cd sparsePregraph;make 63mer=1 clean all;cd ..;
+ @cd standardPregraph;make 63mer=1 clean all;cd ..;
@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-63mer
SOAPdenovo-127mer:
- @cd sparsePregraph;make 127mer=1;cd ..;
- @cd standardPregraph;make 127mer=1;cd ..;
+ @cd sparsePregraph;make 127mer=1 clean all;cd ..;
+ @cd standardPregraph;make 127mer=1 clean all;cd ..;
@$(CC) sparsePregraph/*.o standardPregraph/*.o $(LIBPATH) $(LIBS) $(EXTRA_FLAGS) -o SOAPdenovo-127mer
endif
=====================================
README.md
=====================================
@@ -1,3 +1,5 @@
+![Build Status](https://www.travis-ci.org/cchd0001/SOAPdenovo2.svg?branch=master)
+
# Manual of SOAPdenovo2
## About MEGAHIT
@@ -16,7 +18,7 @@ MEGAHIT: An ultra-fast single-node solution for large and complex metagenomics a
## For MAC users
-Please use <a href="http://brew.sh">brew</a> to install SOAPdenovo. SOAPdenovo's package in Homebrew-science is managed by Shaun Jackman.
+Please use <a href="http://brew.sh">brew</a> to SOAPdenovo. SOAPdenovo's package in Homebrew-science is managed by Shaun Jackman.
## Introduction
@@ -28,7 +30,7 @@ SOAPdenovo aims for large plant and animal genomes, although it also works well
## Installation
1. You can download the pre-compiled binary according to your platform, unpack using "tar -zxf ${destination folder} download.tgz" and execute directly.
-2. Or download the source code, unpack to ${destination folder} with the method above, and compile by using GNU make with command "make" at ${destination folder}/SOAPdenovo-V2.04. Then install executable to ${destination folder}/SOAPdenovo-V2.04/bin using "make install"
+2. Or download the source code, unpack to ${destination folder} with the method above, and compile by using GNU make with command "make" at ${destination folder}/SOAPdenovo-V2.04.
## How to use it
=====================================
example.config
=====================================
@@ -0,0 +1,61 @@
+#maximal read length
+max_rd_len=100
+[LIB]
+#average insert size
+avg_ins=200
+#if sequence needs to be reversed
+reverse_seq=0
+#in which part(s) the reads are used
+asm_flags=3
+#use only first 100 bps of each read
+rd_len_cutoff=100
+#in which order the reads are used while scaffolding
+rank=1
+# cutoff of pair number for a reliable connection (at least 3 for short insert size)
+pair_num_cutoff=3
+#minimum aligned length to contigs for a reliable read location (at least 32 for short insert size)
+map_len=32
+#a pair of fastq file, read 1 file should always be followed by read 2 file
+q1=/path/**LIBNAMEA**/fastq1_read_1.fq
+q2=/path/**LIBNAMEA**/fastq1_read_2.fq
+#another pair of fastq file, read 1 file should always be followed by read 2 file
+q1=/path/**LIBNAMEA**/fastq2_read_1.fq
+q2=/path/**LIBNAMEA**/fastq2_read_2.fq
+#a pair of fasta file, read 1 file should always be followed by read 2 file
+f1=/path/**LIBNAMEA**/fasta1_read_1.fa
+f2=/path/**LIBNAMEA**/fasta1_read_2.fa
+#another pair of fasta file, read 1 file should always be followed by read 2 file
+f1=/path/**LIBNAMEA**/fasta2_read_1.fa
+f2=/path/**LIBNAMEA**/fasta2_read_2.fa
+#fastq file for single reads
+q=/path/**LIBNAMEA**/fastq1_read_single.fq
+#another fastq file for single reads
+q=/path/**LIBNAMEA**/fastq2_read_single.fq
+#fasta file for single reads
+f=/path/**LIBNAMEA**/fasta1_read_single.fa
+#another fasta file for single reads
+f=/path/**LIBNAMEA**/fasta2_read_single.fa
+#a single fasta file for paired reads
+p=/path/**LIBNAMEA**/pairs1_in_one_file.fa
+#another single fasta file for paired reads
+p=/path/**LIBNAMEA**/pairs2_in_one_file.fa
+#bam file for single or paired reads, reads 1 in paired reads file should always be followed by reads 2
+# NOTE: If a read in bam file fails platform/vendor quality checks(the flag field 0x0200 is set), itself and it's paired read would be ignored..
+b=/path/**LIBNAMEA**/reads1_in_file.bam
+#another bam file for single or paired reads
+b=/path/**LIBNAMEA**/reads2_in_file.bam
+[LIB]
+avg_ins=2000
+reverse_seq=1
+asm_flags=2
+rank=2
+# cutoff of pair number for a reliable connection (at least 5 for large insert size)
+pair_num_cutoff=5
+#minimum aligned length to contigs for a reliable read location (at least 35 for large insert size)
+map_len=35
+q1=/path/**LIBNAMEB**/fastq_read_1.fq
+q2=/path/**LIBNAMEB**/fastq_read_2.fq
+f1=/path/**LIBNAMEA**/fasta_read_1.fa
+f2=/path/**LIBNAMEA**/fasta_read_2.fa
+p=/path/**LIBNAMEA**/pairs_in_one_file.fa
+b=/path/**LIBNAMEA**/reads_in_file.bam
=====================================
fusion/check.c
=====================================
@@ -39,7 +39,7 @@ void *ckalloc(unsigned long long amount)
if ((p = (void *) calloc( 1, (unsigned long long) amount)) == NULL && amount != 0)
{
- printf("not enought memory");
+ printf("Out of memory");
fflush(stdout);
exit(-1);
}
=====================================
fusion/orderContig.c
=====================================
@@ -2701,6 +2701,7 @@ void outputLinks(FILE *fp, int insertS)
CONNECT *cnts, *temp_cnt;
//printf("outputLinks, %d contigs\n",num_ctg);
+ fprintf ( fp, "id contigID\tgapLen\tweight\tinsertS\n");
for(i = 1; i <= num_ctg; i++)
{
cnts = contig_array[i].downwardConnect;
=====================================
sparsePregraph/Makefile
=====================================
@@ -1,6 +1,6 @@
CC= g++
ifdef debug
-CFLAGS= -O0 -g -fomit-frame-pointer
+CFLAGS= -O0 -g -fomit-frame-pointer -DDEBUG
else
CFLAGS= -O3 -fomit-frame-pointer -w
endif
@@ -12,8 +12,8 @@ OBJS= build_graph.o build_edge.o multi_threads.o \
PROG=
INCLUDES= -I./inc
SUBDIRS= .
-LIBPATH= -L/usr/lib64
-LIBS= -pthread -lz -L./inc
+LIBPATH= -L./inc -L/usr/lib64
+LIBS= -pthread -lz
EXTRA_FLAGS=
VERSION = 1.0.3
=====================================
sparsePregraph/build_graph.cpp
=====================================
@@ -234,19 +234,20 @@ static void process_round1_threaded ( struct read_t *read, struct hashtable2 *ht
pthread_spin_lock ( &locks[hash_idx[j]] );
found[j] = look_up_in_a_list2_r1 ( &seq[j], ( bucket2_r1 ** * ) &bktptr[j] ); //lock...
- pthread_spin_unlock ( &locks[hash_idx[j]] );
+ //pthread_spin_unlock ( &locks[hash_idx[j]] );
if ( found[j] == 0 )
{
- pthread_spin_lock ( &locks[hash_idx[j]] );
+ //pthread_spin_lock ( &locks[hash_idx[j]] );
* ( bktptr[j] ) = ( struct bucket2 * ) malloc ( sizeof ( struct bucket2_r1 ) ); //lock ...
memset ( * ( bktptr[j] ), 0, sizeof ( struct bucket2_r1 ) );
memcpy ( & ( ( ( struct bucket2_r1 * ) * ( bktptr[j] ) )->kmer_t2.kmer ), & ( seq[j].kmer ), Kmer_arr_sz * sizeof ( uint64_t ) );
( ( struct bucket2_r1 * ) * ( bktptr[j] ) )->kmer_info.cov1 = 0;
// the cvg is useless in round 1
- pthread_spin_unlock ( &locks[hash_idx[j]] );
+ //pthread_spin_unlock ( &locks[hash_idx[j]] );
( *bucket_count ) ++;
}
+ pthread_spin_unlock(&locks[hash_idx[j]]);
j = j + h;
=====================================
sparsePregraph/build_preArc.cpp
=====================================
@@ -94,10 +94,10 @@ void build_vertexes ( vertex_hash2 *v_ht, int K_size, char *edge_file )
size_t line_len, edge_len_left;
int edge_len;
int cvg;
- bool bal_ed;//回文为0
+ int bal_ed;//回文为0
const int BUFF_LEN = 1024;
char line[BUFF_LEN];
- char str[32];
+ //char str[32];
char to_buff[BUFF_LEN];//buffer 2k edge seq BUFF_LEN>4*K_size
int processed = 0; //0表示未处理 1表示 处理完毕 2 表示已经处理了from_vertex ,to_vertex 还没有处理
size_t edge_id = 0;
@@ -125,13 +125,13 @@ void build_vertexes ( vertex_hash2 *v_ht, int K_size, char *edge_file )
}
#ifdef _63MER_
- sscanf ( line + 7, "%d,%llx %llx ,%llx %llx ,%s %d,%d", &edge_len,
- & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], str, &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
+ sscanf ( line, ">length %d,%llx %llx,%llx %llx,cvg %d,%d", &edge_len,
+ & ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
#endif
#ifdef _127MER_
- sscanf ( line + 7, "%d,%llx %llx %llx %llx ,%llx %llx %llx %llx ,%s %d,%d", &edge_len,
+ sscanf ( line, ">length %d,%llx %llx %llx %llx,%llx %llx %llx %llx,cvg %d,%d", &edge_len,
& ( from_kmer.kmer ) [0], & ( from_kmer.kmer ) [1], & ( from_kmer.kmer ) [2], & ( from_kmer.kmer ) [3],
- & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], & ( to_kmer.kmer ) [2], & ( to_kmer.kmer ) [3], str, &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
+ & ( to_kmer.kmer ) [0], & ( to_kmer.kmer ) [1], & ( to_kmer.kmer ) [2], & ( to_kmer.kmer ) [3], &cvg, &bal_ed ); // from_kmer to_kmer is of no use here
#endif
edge_len_left = K_size + edge_len;
processed = 0;
@@ -1311,7 +1311,7 @@ int put_path_2_buffer ( struct edge_path_buffer *buffer, unsigned int *path )
memcpy ( ( buffer->path_buffer ) [pos], path, buffer->max_path_length * sizeof ( unsigned int ) );
- for ( unsigned int i = 1; i < path[0]; i++ )
+ for ( unsigned int i = 1; i <= path[0]; i++ )
{
pthread_spin_lock ( ( buffer->locks ) + path[i] );
( ( buffer->mark_on_edge ) [path[i]] ) ++;
=====================================
sparsePregraph/inc/core.h
=====================================
@@ -106,7 +106,7 @@ inline void Init_HT2 ( struct hashtable2 *ht, size_t ht_sz )
}
-inline bool look_up_in_a_list2_r1 ( struct kmer_t2 *seq, struct bucket2_r1 *** ptr )
+inline bool look_up_in_a_list2_r1 ( struct kmer_t2 *seq, struct bucket2_r1 **volatile* ptr )
{
while ( ( **ptr ) != NULL )
{
@@ -121,7 +121,7 @@ inline bool look_up_in_a_list2_r1 ( struct kmer_t2 *seq, struct bucket2_r1 *** p
return ( ( **ptr ) != NULL );
}
-inline bool look_up_in_a_list2 ( struct kmer_t2 *seq, struct bucket2 *** ptr )
+inline bool look_up_in_a_list2 ( struct kmer_t2 *seq, struct bucket2 **volatile* ptr )
{
while ( ( **ptr ) != NULL )
{
=====================================
standardPregraph/Makefile
=====================================
@@ -9,7 +9,7 @@ CC= gcc
GCCVERSIONMAJOR := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONMINOR := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 4)
ifdef debug
-CFLAGS= -O0 -g -fomit-frame-pointer #-msse4.2
+CFLAGS= -O0 -g -fomit-frame-pointer -DDEBUG#-msse4.2
else
CFLAGS= -O3 -fomit-frame-pointer -w #-msse4.2
endif
@@ -27,7 +27,7 @@ OBJS= arc.o attachPEinfo.o bubble.o check.o compactEdge.o \
PROG= SOAPdenovo-63mer
INCLUDES= -Iinc
SUBDIRS= .
-LIBPATH= -L/lib64 -L/usr/lib64 -L./inc
+LIBPATH= -L./inc -L/lib64 -L/usr/lib64
LIBS= -pthread -lz -lm
EXTRA_FLAGS=
=====================================
standardPregraph/attachPEinfo.c
=====================================
@@ -409,7 +409,7 @@ int connectByPE_grad ( FILE *fp, int peGrad, char *line )
if ( strlen ( line ) )
{
- sscanf ( line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos );
+ sscanf ( line, "%lld %u %d", &pre_readno, &pre_contigno, &pre_pos );
if ( pre_readno <= minno )
{
@@ -427,7 +427,7 @@ int connectByPE_grad ( FILE *fp, int peGrad, char *line )
while ( fgets ( line, lineLen, fp ) != NULL )
{
- sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
+ sscanf ( line, "%lld %u %d", &readno, &contigno, &pos );
if ( readno > maxno )
{
@@ -520,7 +520,7 @@ int connectByPE_grad_gz ( gzFile *fp, int peGrad, char *line )
if ( strlen ( line ) )
{
- sscanf ( line, "%lld %d %d", &pre_readno, &pre_contigno, &pre_pos );
+ sscanf ( line, "%lld %u %d", &pre_readno, &pre_contigno, &pre_pos );
if ( pre_readno <= minno )
{
@@ -538,7 +538,7 @@ int connectByPE_grad_gz ( gzFile *fp, int peGrad, char *line )
while ( gzgets ( fp, line, lineLen ) != NULL )
{
- sscanf ( line, "%lld %d %d", &readno, &contigno, &pos );
+ sscanf ( line, "%lld %u %d", &readno, &contigno, &pos );
if ( readno > maxno )
{
=====================================
standardPregraph/inc/global.h
=====================================
@@ -91,6 +91,7 @@ int initKmerSetSize = 0; // init_size = (ubyte8) ((double) initKmerSetSize * 1
long known_genome_size = 0;
int smallKmer = 0; // the kmer of the step "Map"
int deltaKmer = 0; // for map, K-k
+long contigTotalLength = 0 ;
double cvg_low = 0.1;
double cvg_high = 2;
=====================================
standardPregraph/inc/newhash.h
=====================================
@@ -28,6 +28,7 @@
#endif
#define MAX_KMER_COV 63
+#define MAX_KMER_COV_ALL 255
#define EDGE_BIT_SIZE 6 //6 bit for each edge
#define EDGE_XOR_MASK 0x3FU
#define LINKS_BITS 0x00FFFFFFU
@@ -35,8 +36,10 @@
#define get_kmer_seq(mer) ((mer).seq)
#define set_kmer_seq(mer, val) ((mer).seq = val)
-#define get_kmer_left_cov(mer, idx) (((mer).l_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
-#define set_kmer_left_cov(mer, idx, val) ((mer).l_links = ((mer).l_links&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) )
+//#define get_kmer_left_cov(mer, idx) (((mer).l_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
+//#define set_kmer_left_cov(mer, idx, val) ((mer).l_links = ((mer).l_links&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) )
+#define get_kmer_left_cov(mer, idx) (((mer).l_links.l_links.l_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
+#define set_kmer_left_cov(mer, idx, val) ((mer).l_links.l_links.l_links = (((mer).l_links.l_links.l_links)&(~(EDGE_XOR_MASK<<((idx)*EDGE_BIT_SIZE)))) | (((val)&EDGE_XOR_MASK)<<((idx)*EDGE_BIT_SIZE)) )
#define get_kmer_left_covs(mer) (get_kmer_left_cov(mer, 0) + get_kmer_left_cov(mer, 1) + get_kmer_left_cov(mer, 2) + get_kmer_left_cov(mer, 3))
#define get_kmer_right_cov(mer, idx) (((mer).r_links>>((idx)*EDGE_BIT_SIZE))&EDGE_XOR_MASK)
@@ -52,6 +55,15 @@
#define clear_kmer_entity_del(flags, idx) ((flags)[(idx)>>4] &= ~(0x02u<<(((idx)&0x0f)<<1)))
#define exists_kmer_entity(flags, idx) (!((flags)[(idx)>>4]>>(((idx)&0x0f)<<1)&0x03))
+#define set_kmer_edge_id(mer, idx) (((mer).l_links.edgeId)=(idx))
+#define get_kmer_edge_id(mer) ((mer).l_links.edgeId)
+#define set_kmer_contig_id(mer , idx) (((mer).l_links.contigId) = (idx))
+#define get_kmer_contig_id(mer) ((mer).l_links.contigId)
+#define set_kmer_cov(mer, idx) (((mer).l_links.l_links.covs) = ((idx) & 0xff))
+#define get_kmer_cov(mer) ((mer).l_links.l_links.covs)
+
+
+
#ifdef MER127
typedef __uint128_t u128b;
@@ -62,11 +74,24 @@ typedef struct u256b
} U256b;
#else
#endif
+typedef struct l_link_pack
+{
+ ubyte4 l_links : 4 * EDGE_BIT_SIZE;
+ ubyte4 covs : 8;
+} L_link_pack;
+
+typedef union l_link_union
+{
+ L_link_pack l_links;
+ ubyte4 edgeId;
+ ubyte4 contigId;
+} L_link_union;
typedef struct kmer_st
{
Kmer seq; //kmer set
- ubyte4 l_links; // sever as edgeID since make_edge
+ L_link_union l_links;
+ //ubyte4 l_links; // sever as edgeID since make_edge
ubyte4 r_links: 4 * EDGE_BIT_SIZE;
ubyte4 linear: 1;
ubyte4 deleted: 1;
=====================================
standardPregraph/iterate.c
=====================================
@@ -25,7 +25,9 @@
#include "kmerhash.h"
#include "extfunc.h"
#include "extvab.h"
-
+#if DEBUG
+#define inline
+#endif
static Kmer *kmerBuffer;
static ubyte8 *hashBanBuffer;
@@ -1060,7 +1062,7 @@ void addEdge ( unsigned int from, unsigned int to, char ch, int bal_edge, unsign
if ( num_ed_temp + 1 > num_ed_limit )
{
unsigned int new_num_ed = num_ed_limit * 1.2;
- edge_array = ( EDGE * ) ckrealloc ( edge_array, ( new_num_ed + 1 ) * sizeof ( EDGE ), ( num_ed_limit + 1 ) * sizeof ( EDGE ) );
+ edge_array = ( EDGE * ) ckrealloc ( edge_array, ( new_num_ed + 3 ) * sizeof ( EDGE ), ( num_ed_limit + 3 ) * sizeof ( EDGE ) );
num_ed_limit = new_num_ed;
int j;
@@ -1888,11 +1890,11 @@ static int cmp_seq ( const void *a, const void *b )
}
}
- if ( i == A->length && i < B->length )
+ if ( ( A->length == 0 ) || ( i == A->length && i < B->length ) )
{
return -1;
}
- else if ( i < A->length && i == B->length )
+ else if ( ( B->length == 0 ) || ( i < A->length && i == B->length ) )
{
return 1;
}
@@ -2731,4 +2733,3 @@ void Iterate ( char *libfile, char *graph, int maxk, int M ) //boolean keepRea
fprintf ( stderr, "Iteration finished.\n" );
fprintf ( stderr, "Time spent on iteration: %dm.\n\n", ( int ) ( stop_t - start_t ) / 60 );
}
-
=====================================
standardPregraph/loadPreGraph.c
=====================================
@@ -457,7 +457,7 @@ void loadEdge ( char *graphfile )
sprintf ( name, "%s.edge.gz", graphfile );
fp = gzopen ( name, "r" );
num_ed_limit = 1.2 * num_ed;
- edge_array = ( EDGE * ) ckalloc ( ( num_ed_limit + 1 ) * sizeof ( EDGE ) );
+ edge_array = ( EDGE * ) ckalloc ( ( num_ed_limit + 3 ) * sizeof ( EDGE ) );
for ( j = num_ed + 1; j <= num_ed_limit; j++ )
{
=====================================
standardPregraph/map.c
=====================================
@@ -29,7 +29,7 @@
static void initenv ( int argc, char **argv );
static char shortrdsfile[256];
static char graphfile[256];
-
+extern long contigTotalLength;
static void display_map_usage ();
/*************************************************
@@ -159,7 +159,7 @@ void initenv ( int argc, char **argv )
inpseq = outseq = 0;
fprintf ( stderr, "Parameters: map " );
- while ( ( copt = getopt ( argc, argv, "s:g:K:p:k:f" ) ) != EOF )
+ while ( ( copt = getopt ( argc, argv, "s:g:K:p:k:h:f" ) ) != EOF )
{
//printf("get option\n");
switch ( copt )
@@ -194,6 +194,12 @@ void initenv ( int argc, char **argv )
smallKmer = atoi ( temp );
break;
+ case 'h':
+ fprintf ( stderr, "-h %s ", optarg );
+ sscanf ( optarg, "%s", temp );
+ contigTotalLength = atol( temp );
+ break;
+
case 'f':
fill = 1;
fprintf ( stderr, "-f " );
@@ -220,9 +226,10 @@ void initenv ( int argc, char **argv )
static void display_map_usage ()
{
- fprintf ( stderr, "\nmap -s configFile -g inputGraph [-f] [-p n_cpu -k kmer_R2C]\n" );
+ fprintf ( stderr, "\nmap -s configFile -g inputGraph [-f] [-p n_cpu -k kmer_R2C] [-h contig_total_length]\n" );
fprintf ( stderr, " -s <string> configFile: the config file of solexa reads\n" );
fprintf ( stderr, " -g <string> inputGraph: prefix of input graph file names\n" );
+ fprintf ( stderr, " -h (optional) total length of contigs for init hash table. [1024]\n" );
fprintf ( stderr, " -f (optional) output gap related reads in map step for using SRkgf to fill gap, [NO]\n" );
fprintf ( stderr, " -p <int> n_cpu: number of cpu for use, [8]\n" );
#ifdef MER127
=====================================
standardPregraph/newhash.c
=====================================
@@ -74,6 +74,7 @@ Return:
static inline void update_kmer ( kmer_t *mer, ubyte left, ubyte right )
{
ubyte4 cov;
+ ubyte4 cov_all;
if ( left < 4 )
{
@@ -94,6 +95,14 @@ static inline void update_kmer ( kmer_t *mer, ubyte left, ubyte right )
set_kmer_right_cov ( *mer, right, cov + 1 );
}
}
+ if ( left < 4 || right < 4 )
+ {
+ cov_all = get_kmer_cov( *mer );
+ if(cov_all < MAX_KMER_COV_ALL)
+ {
+ set_kmer_cov(*mer,cov_all+1);
+ }
+ }
}
/*************************************************
@@ -113,7 +122,9 @@ Return:
*************************************************/
static inline void set_new_kmer ( kmer_t *mer, Kmer seq, ubyte left, ubyte right )
{
- *mer = empty_kmer;
+ // *mer = empty_kmer;
+ memset(mer,0,sizeof(kmer_t));
+ mer->single=1;
set_kmer_seq ( *mer, seq );
if ( left < 4 )
@@ -125,6 +136,7 @@ static inline void set_new_kmer ( kmer_t *mer, Kmer seq, ubyte left, ubyte right
{
set_kmer_right_cov ( *mer, right, 1 );
}
+ set_kmer_cov(*mer,1);
}
static inline int is_prime_kh ( ubyte8 num )
=====================================
standardPregraph/node2edge.c
=====================================
@@ -513,7 +513,7 @@ static void merge_linearV2 ( char bal_edge, STACK *nStack, int count, gzFile *fp
*/
}
- longNode->l_links = edge_c;
+ set_kmer_edge_id(*longNode ,edge_c);
longNode->twin = ( unsigned char ) ( bal_edge + 1 );
}
else
@@ -536,7 +536,7 @@ static void merge_linearV2 ( char bal_edge, STACK *nStack, int count, gzFile *fp
*/
}
- longNode->l_links = edge_c + bal_edge;
+ set_kmer_edge_id(*longNode, edge_c + bal_edge);
longNode->twin = ( unsigned char ) ( -bal_edge + 1 );
}
}
@@ -570,12 +570,12 @@ static void merge_linearV2 ( char bal_edge, STACK *nStack, int count, gzFile *fp
if ( temp->isSmaller )
{
- del_node->l_links = edge_c;
+ set_kmer_edge_id(*del_node, edge_c);
del_node->twin = ( unsigned char ) ( bal_edge + 1 );
}
else
{
- del_node->l_links = edge_c + bal_edge;
+ set_kmer_edge_id(*del_node, edge_c + bal_edge);
del_node->twin = ( unsigned char ) ( -bal_edge + 1 );
}
}
=====================================
standardPregraph/orderContig.c
=====================================
@@ -17,11 +17,11 @@
#define GapUpperBound 300000
#define MaxCntNode 1000
-
+/*
static int DEBUG = 0;
static int DEBUG1 = 0;
static int DEBUG2 = 0;
-
+*/
static int bySmall = 1;
static boolean static_f = 0;
@@ -5497,6 +5497,7 @@ static void outputLinks ( FILE *fp, int insertS )
unsigned int i, bal_ctg, bal_toCtg;
CONNECT *cnts, *temp_cnt;
+ fprintf ( fp, "id contigID\tgapLen\tweight\tinsertS\n");
for ( i = 1; i <= num_ctg; i++ )
{
cnts = contig_array[i].downwardConnect;
=====================================
standardPregraph/prlHashCtg.c
=====================================
@@ -28,7 +28,7 @@
//debugging variables
static long long *kmerCounter;
-
+extern long contigTotalLength;
//buffer related varibles for chop kmer
static unsigned int read_c;
static char **rcSeq;
@@ -143,7 +143,8 @@ static void singleKmer ( int t, KmerSet *kset, unsigned int seq_index, unsigned
node->twin = 1;
};
- node->l_links = ctgIdArray[seq_index];
+ //node->l_links = ctgIdArray[seq_index];
+ set_kmer_contig_id(*node ,ctgIdArray[seq_index]);
node->r_links = pos;
}
@@ -365,9 +366,16 @@ boolean prlContig2nodes ( char *grapfile, int len_cut )
kmerCounter = ( long long * ) ckalloc ( ( thrd_num + 1 ) * sizeof ( long long ) );
KmerSets = ( KmerSet ** ) ckalloc ( thrd_num * sizeof ( KmerSet * ) );
+
+ long initHashSize = 1024;
+ float loadFactor = 0.77f;
+ if( contigTotalLength != 0 )
+ {
+ initHashSize = contigTotalLength / thrd_num /loadFactor ;
+ }
for ( i = 0; i < thrd_num; i++ )
{
- KmerSets[i] = init_kmerset ( 1024, 0.77f );
+ KmerSets[i] = init_kmerset ( initHashSize, loadFactor );
thrdSignal[i + 1] = 0;
paras[i].threadID = i;
paras[i].mainSignal = &thrdSignal[0];
=====================================
standardPregraph/prlHashReads.c
=====================================
@@ -483,7 +483,13 @@ boolean prlRead2HashTable ( char *libfile, char *outfile )
if ( turn == 1 )
{
turn = 2;
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );
+ if( ! readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo ) )
+ {
+ fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+ *(readBuffer1+offset1)='\0';
+ fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer1+start1);
+ exit(-1);
+ }
if ( ( ++i ) % 100000000 == 0 )
{
@@ -532,7 +538,13 @@ boolean prlRead2HashTable ( char *libfile, char *outfile )
if ( turn == 2 )
{
turn = 1;
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );
+ if( !readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo ) )
+ {
+ fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+ *(readBuffer2+offset2)='\0';
+ fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer2+start2);
+ exit(-1);
+ }
if ( ( ++i ) % 100000000 == 0 )
{
@@ -608,7 +620,13 @@ boolean prlRead2HashTable ( char *libfile, char *outfile )
while ( start < offset )
{
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );
+ if( ! readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo ) )
+ {
+ fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+ *(readBuffer1+offset)='\0';
+ fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer1+start);
+ exit(-1);
+ }
if ( ( ++i ) % 100000000 == 0 )
{
@@ -851,9 +869,15 @@ int AIORead ( struct aiocb *mycb, int *offset, char *buf, char *cach, int *rt, i
*rt = aio_read ( mycb );
return 1;
}
+ else if ( 0 == get )
+ {
+ fprintf(stderr, "Warning : aio_return zero, the size of input file must be N * 32768.\n");
+ return 2;
+ }
else
{
- fprintf(stderr, "Error: aio_return error.\n");
+ fprintf(stderr, "Error: aio_return %d, error happened.\n",get);
+ exit(-1);
}
/*else
@@ -956,7 +980,8 @@ static void thread_delow ( KmerSet *set, unsigned char thrdID )
}
}
- if ( rs->l_links == 0 && rs->r_links == 0 )
+ //if ( rs->l_links == 0 && rs->r_links == 0 )
+ if ( get_kmer_left_covs(*rs) ==0 && get_kmer_right_covs(*rs) == 0 )
{
rs->deleted = 1;
tips[thrdID]++;
@@ -1027,14 +1052,16 @@ static void thread_mark ( KmerSet *set, unsigned char thrdID )
if ( rs->single )
{
- kmerFreq[thrdID][1]++;
+ //kmerFreq[thrdID][1]++;
counter++;
}
+ /*
else
{
kmerFreq[thrdID][ ( l_cvg > r_cvg ? l_cvg : r_cvg )]++;
}
-
+ */
+ kmerFreq[thrdID][get_kmer_cov(*rs)] ++ ;
if ( in_num == 1 && out_num == 1 )
{
rs->linear = 1;
=====================================
standardPregraph/prlRead2Ctg.c
=====================================
@@ -298,7 +298,7 @@ static void parse1read ( int t )
continue;
}
- if ( nodeBuffer[s]->l_links == node->l_links )
+ if ( get_kmer_contig_id(*nodeBuffer[s]) == get_kmer_contig_id(*node) )
{
flag++;
nodeBuffer[s] = NULL;
@@ -342,7 +342,7 @@ static void parse1read ( int t )
i = pos - start + 1;
node = nodeBuffer[j];
isSmaller = smallerBuffer[j];
- contigID = node->l_links;
+ contigID = get_kmer_contig_id(*node);
ctgLen = contig_array[contigID].length;
pos = node->r_links;
@@ -407,7 +407,7 @@ static void locate1read ( int t )
i = j - start + 1;
isSmaller = smallerBuffer[j];
- contigID = node->l_links;
+ contigID = get_kmer_contig_id(*node);
ctgLen = contig_array[contigID].length;
pos = node->r_links;
=====================================
standardPregraph/prlRead2path.c
=====================================
@@ -574,22 +574,22 @@ static void search1kmerPlus ( int j, unsigned char thrdID )
if ( smallerBuffer[j] )
{
- mixBuffer[j].low2 = node->l_links;
+ mixBuffer[j].low2 = get_kmer_contig_id(*node);
}
else
{
- mixBuffer[j].low2 = node->l_links + node->twin - 1;
+ mixBuffer[j].low2 = get_kmer_contig_id(*node) + node->twin - 1;
}
#else
if ( smallerBuffer[j] )
{
- mixBuffer[j].low = node->l_links;
+ mixBuffer[j].low = get_kmer_contig_id(*node);;
}
else
{
- mixBuffer[j].low = node->l_links + node->twin - 1;
+ mixBuffer[j].low = get_kmer_contig_id(*node) + node->twin - 1;
}
#endif
@@ -635,11 +635,11 @@ static void parse1read ( int t, int threadID )
{
if ( isSmaller )
{
- edge_index = node->l_links;
+ edge_index = get_kmer_contig_id(*node);
}
else
{
- edge_index = node->l_links + node->twin - 1;
+ edge_index = get_kmer_contig_id(*node) + node->twin - 1;
}
#ifdef MER127
@@ -1001,7 +1001,13 @@ void prlRead2edge ( char *libfile, char *outfile )
if ( turn == 1 )
{
turn = 2;
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo );
+ if( ! readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start1, offset1, libNo ) )
+ {
+ fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+ *(readBuffer1+offset1)='\0';
+ fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer1+start1);
+ exit(-1);
+ }
if ( ( ++i ) % 100000000 == 0 )
{
@@ -1078,7 +1084,13 @@ void prlRead2edge ( char *libfile, char *outfile )
if ( turn == 2 )
{
turn = 1;
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo );
+ if( !readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer2, &start2, offset2, libNo ) )
+ {
+ fprintf ( stderr, "readseqInLib return error! please make sure input file is correct fastq/fasta file \n");
+ *(readBuffer2+offset2)='\0';
+ fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer2+start2);
+ exit(-1);
+ }
if ( ( ++i ) % 100000000 == 0 )
{
@@ -1181,7 +1193,14 @@ void prlRead2edge ( char *libfile, char *outfile )
while ( start < offset )
{
- readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo );
+ if( ! readseqInLib ( seqBuffer[read_c], next_name, & ( lenBuffer[read_c] ), readBuffer1, &start, offset, libNo ) )
+ {
+ fprintf ( stderr, "readseqInLib return error! please makesure input file is correct fastq/fasta file \n");
+ *(readBuffer1+offset)='\0';
+ fprintf ( stderr, "invalid data left in buffer:\n%s\n",readBuffer1+start);
+ exit(-1);
+ }
+
if ( ( ++i ) % 100000000 == 0 )
{
=====================================
standardPregraph/read2edge.c
=====================================
@@ -25,6 +25,9 @@
#include "newhash.h"
#include <extfunc.h>
#include <extvab.h>
+#if DEBUG
+#define inline
+#endif
static pthread_mutex_t mutex_arc;
=====================================
standardPregraph/readseq1by1.c
=====================================
@@ -931,6 +931,10 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
if ( lib_array[i].paired == 1 )
{
readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
+ if( *len_seq < 1 )
+ {
+ return 0;
+ }
if ( lib_array[i].reverse )
{
@@ -945,6 +949,10 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
{
readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
+ if( *len_seq < 1 )
+ {
+ return 0;
+ }
if ( lib_array[i].reverse )
{
reverse2k ( src_seq, *len_seq );
@@ -961,6 +969,10 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
if ( lib_array[i].paired == 1 )
{
readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
+ if( *len_seq < 1 )
+ {
+ return 0;
+ }
if ( lib_array[i].reverse )
{
@@ -974,6 +986,10 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
else
{
readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
+ if( *len_seq < 1 )
+ {
+ return 0;
+ }
if ( lib_array[i].reverse )
{
@@ -989,10 +1005,18 @@ boolean readseqInLib ( char *src_seq, char *src_name, int *len_seq, char *buf, i
if ( lib_array[i].curr_type == 6 )
{
readseqfq ( src_seq, src_name, len_seq, buf, start, offset );
+ if( *len_seq < 1 )
+ {
+ return 0;
+ }
}
else
{
readseqInBuf ( src_seq, src_name, len_seq, buf, start, offset );
+ if( *len_seq < 1 )
+ {
+ return 0;
+ }
}
/*
View it on GitLab: https://salsa.debian.org/med-team/soapdenovo2/-/commit/2b05218e222c4bc191a008ab8ae3f10820e4c8cc
--
View it on GitLab: https://salsa.debian.org/med-team/soapdenovo2/-/commit/2b05218e222c4bc191a008ab8ae3f10820e4c8cc
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20201027/607d7d6e/attachment-0001.html>
More information about the debian-med-commit
mailing list