[med-svn] [subread] 02/03: Imported Upstream version 1.5.0-p3+dfsg
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Fri Jun 3 13:07:59 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository subread.
commit 5c4e244dd213e7f263430735647072af434781be
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Thu Jun 2 14:29:20 2016 +0200
Imported Upstream version 1.5.0-p3+dfsg
---
doc/SubreadUsersGuide.tex | 6 +-
src/HelperFunctions.c | 91 +++++++++++++
src/HelperFunctions.h | 2 +
src/Makefile | 1 -
src/SNPCalling.c | 63 ++++++---
src/core-indel.c | 5 +-
src/core.c | 57 ++++----
src/input-files.c | 338 ++++++++++++++++++++++++++++++++++++++++------
src/input-files.h | 4 +
src/makefile.version | 2 +-
src/propmapped.c | 1 +
src/readSummary.c | 7 +-
src/test-fisher.c | 84 ++++++++++++
13 files changed, 567 insertions(+), 94 deletions(-)
diff --git a/doc/SubreadUsersGuide.tex b/doc/SubreadUsersGuide.tex
index 48ee9a6..0bb5644 100644
--- a/doc/SubreadUsersGuide.tex
+++ b/doc/SubreadUsersGuide.tex
@@ -35,9 +35,9 @@
\begin{center}
{\Huge\bf Subread/Rsubread Users Guide}\\
\vspace{1 cm}
-{\centering\large Subread v1.5.0-p2/Rsubread v1.20.5\\}
+{\centering\large Subread v1.5.0-p3/Rsubread v1.22.1\\}
\vspace{1 cm}
-\centering 11 April 2016\\
+\centering 27 May 2016\\
\vspace{5 cm}
\Large Wei Shi and Yang Liao\\
\vspace{1 cm}
@@ -764,7 +764,7 @@ Note that, when counting at the meta-feature level, reads that overlap multiple
\subsection{In-built annotations}
-In-built gene annotations for genomes \emph{hg19}, \emph{mm10} and \emph{mm9} are included in both Bioconductor {\Rsubread} package and SourceForge {\Subread} package.
+In-built gene annotations for genomes \emph{hg38}, \emph{hg19}, \emph{mm10} and \emph{mm9} are included in both Bioconductor {\Rsubread} package and SourceForge {\Subread} package.
These annotations were downloaded from NCBI RefSeq database and then adapted by merging overlapping exons from the same gene to form a set of disjoint exons for each gene.
Genes with the same Entrez gene identifiers were also merged into one gene.
diff --git a/src/HelperFunctions.c b/src/HelperFunctions.c
index 39b9dd4..c976dc0 100644
--- a/src/HelperFunctions.c
+++ b/src/HelperFunctions.c
@@ -20,6 +20,7 @@
#include <ctype.h>
#include <string.h>
#include <assert.h>
+#include <math.h>
#ifdef MACOS
@@ -844,6 +845,8 @@ int mac_str(char * str_buff)
}
}
+ close(sock);
+
unsigned char mac_address[6];
if (success){
@@ -886,5 +889,93 @@ int mac_or_rand_str(char * str_buff){
return mac_str(str_buff) && rand_str(str_buff) && mathrand_str(str_buff);
}
+#define PI_LONG 3.1415926535897932384626434L
+
+long double fast_fractorial(unsigned int x, long double * buff, int buflen){
+ if(x<2) return 0;
+
+ if(buff != NULL && x < buflen && buff[x]!=0){
+ return buff[x];
+ }
+ long double ret;
+
+ if(x<50){
+ int x1;
+ ret =0.L;
+ for(x1 = 2 ; x1 <= x; x1++) ret += logl((long double)(x1));
+ }else{
+ ret = logl(x)*1.0L*x - 1.0L*x + 0.5L * logl(2.0L*PI_LONG* x) + 1.L/(12.L*x) - 1.L/(360.L* x*x*x) + 1.L/(1260.L* x*x*x*x*x) - 1.L/(1680.L*x*x*x*x*x*x*x);
+ }
+ if(buff != NULL && x < buflen) buff[x]=ret;
+ return ret;
+}
+
+
+#define BUFF_4D 36
+
+long double fast_freq( unsigned int tab[2][2] , long double * buff, int buflen){
+ int x0 = -1;
+ if(buff && buflen > BUFF_4D * BUFF_4D * BUFF_4D * BUFF_4D && tab[0][0] < BUFF_4D && tab[0][1] < BUFF_4D && tab[1][0] < BUFF_4D && tab[1][1] < BUFF_4D ){
+ x0 = buflen + tab[0][0]* BUFF_4D * BUFF_4D * BUFF_4D + tab[0][1] * BUFF_4D * BUFF_4D + tab[1][0] * BUFF_4D + tab[1][1];
+ if(buff[x0]!=0) return buff[x0];
+
+ }
+ long double ret = fast_fractorial(tab[0][0]+tab[0][1],buff,buflen)+fast_fractorial(tab[1][0]+tab[1][1],buff,buflen) +
+ fast_fractorial(tab[0][0]+tab[1][0],buff,buflen)+fast_fractorial(tab[0][1]+tab[1][1],buff,buflen) -
+ fast_fractorial(tab[0][0],buff,buflen) - fast_fractorial(tab[0][1],buff,buflen) -
+ fast_fractorial(tab[1][0],buff,buflen) - fast_fractorial(tab[1][1],buff,buflen) -
+ fast_fractorial(tab[0][0] + tab[0][1] + tab[1][0] + tab[1][1],buff,buflen);
+ if(x0>=0) buff[x0] = ret;
+ return ret;
+}
+
+
+
+double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c, unsigned int d, long double * buff, int buflen){
+ unsigned int tab[2][2];
+ long double P0, P_delta, ret;
+
+ tab[0][0]=a;
+ tab[0][1]=b;
+ tab[1][0]=c;
+ tab[1][1]=d;
+
+ int x_a, y_a, x_min=-1, y_min=-1;
+ unsigned int min_a = 0xffffffff;
+ for(x_a = 0; x_a < 2; x_a++)
+ for(y_a = 0; y_a < 2; y_a++){
+ if(tab[x_a][y_a]<min_a){
+ min_a = tab[x_a][y_a];
+ x_min = x_a;
+ y_min = y_a;
+ }
+ }
+ P_delta = fast_freq(tab, buff, buflen);
+ P0 = ret = exp(P_delta);
+ //printf("P0=%LG\n", P0);
+ if(min_a>0){
+ unsigned int Qa = min_a;
+ unsigned int Qb = tab[x_min][!y_min];
+ unsigned int Qc = tab[!x_min][y_min];
+ unsigned int Qd = tab[!x_min][!y_min];
+ for(; ; ){
+ min_a --;
+ Qb++;Qc++;
+ P_delta -= logl(Qb*Qc);
+ P_delta += logl(Qa*Qd);
+ Qa--;Qd--;
+ ret += expl(P_delta);
+ // printf("%LG %LG %LG\n", ret, 1 - (ret - P0), expl(P_delta));
+ if(min_a < 1) break;
+ }
+ }
+
+ double ret1 = ret, ret2 = 1 - (ret - P0);
+ if(min(ret1, ret2) < 0){
+ return 0.0;
+ }
+ return min(ret1, ret2) ;
+
+}
diff --git a/src/HelperFunctions.h b/src/HelperFunctions.h
index 2f40d21..65beb1e 100644
--- a/src/HelperFunctions.h
+++ b/src/HelperFunctions.h
@@ -69,4 +69,6 @@ int strcmp_number(char * s1, char * s2);
unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar);
unsigned int find_left_end_cigar(unsigned int right_pos, char * cigar);
int mac_or_rand_str(char * char_14);
+
+double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c, unsigned int d, long double * frac_buffer, int buffer_size);
#endif
diff --git a/src/Makefile b/src/Makefile
index 9215779..41e8d9f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -6,7 +6,6 @@ all:
@echo " For building subread in Linux, please run ' make -f Makefile.Linux '."
@echo " For building subread in Mac OS X, please run ' make -f Makefile.MacOS '."
@echo " For building subread in FreeBSD, please run ' gmake -f Makefile.FreeBSD '."
- @echo " For building subread in Oracle Solaris or OpenSolaris, please run ' gmake -f Makefile.SunOS '."
@echo
@echo " The default compiler is gcc; you may change it by editing the Makefiles for platforms."
@echo
diff --git a/src/SNPCalling.c b/src/SNPCalling.c
index fbd8b03..e34184f 100644
--- a/src/SNPCalling.c
+++ b/src/SNPCalling.c
@@ -99,7 +99,7 @@ struct SNP_Calling_Parameters{
#define PRECALCULATE_FACTORIAL 2000000
-double * precalculated_factorial;// [PRECALCULATE_FACTORIAL];
+long double * precalculated_factorial;// [PRECALCULATE_FACTORIAL];
double factorial_float_real(int a)
{
@@ -113,7 +113,6 @@ double factorial_float_real(int a)
double factorial_float(int a)
{
-
if(a<PRECALCULATE_FACTORIAL && (precalculated_factorial[a]!=0.))
return precalculated_factorial[a];
else
@@ -154,25 +153,43 @@ double fisher_exact_test(int a, int b, int c, int d)
//printf("FET: %d %d %d %d\n", a, b, c, d);
- if (a * d > b * c) {
- a = a + b; b = a - b; a = a - b;
- c = c + d; d = c - d; c = c - d;
- }
- if (a > d) { a = a + d; d = a - d; a = a - d; }
- if (b > c) { b = b + c; c = b - c; b = b - c; }
+ if(1){
+ double ret = fast_fisher_test_one_side(a,b,c,d, precalculated_factorial, PRECALCULATE_FACTORIAL);
+ //SUBREADprintf("FISHER_RES %d %d %d %d %.9f %.9f\n", a,b,c,d, ret, log(ret));
+ return ret;
+ }else{
+ int AA=a, BB=b, CC=c, DD=d;
+ if (a * d > b * c) {
+ a = a + b; b = a - b; a = a - b;
+ c = c + d; d = c - d; c = c - d;
+ }
- double p_sum = 0.0;
+ if (a > d) { a = a + d; d = a - d; a = a - d; }
+ if (b > c) { b = b + c; c = b - c; b = b - c; }
- double p = fisherSub(a, b, c, d);
- while (a >= 0) {
- p_sum += p;
- if (a == 0) break;
- --a; ++b; ++c; --d;
- p = fisherSub(a, b, c, d);
- }
+ double p_sum = 0.0;
+
+ double p = fisherSub(a, b, c, d);
+ while (a >= 0) {
+ p_sum += p;
+ if (a == 0) break;
+ --a; ++b; ++c; --d;
+ p = fisherSub(a, b, c, d);
+ }
- return p_sum;
+ if(1){
+ // DON'T PUT IT HERE!
+ double r1 = fast_fisher_test_one_side(AA,BB,CC,DD, NULL, PRECALCULATE_FACTORIAL);
+ if(abs(r1-p_sum) / max(r1,p_sum) >= 0.01){
+ printf("BADR: FAST = %.7f != JAVA %.7f, %d %d %d %d\n", log(r1), log(p_sum),AA,BB,CC,DD);
+ }else{
+ printf("GOODR: FAST = %.7f ~= JAVA %.7f, %d %d %d %d\n", log(r1), log(p_sum),AA,BB,CC,DD);
+ }
+ }
+
+ return p_sum;
+ }
}
unsigned int fisher_test_size;
@@ -508,7 +525,8 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
}
float p_middle = fisher_exact_test(a, flanking_unmatched, c, flanking_matched);
- //SUBREADprintf("TEST: %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
+ if(0 && flanking_matched > 10000 && p_middle < 1E-5)
+ SUBREADprintf("TEST: %u a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
if(all_result_needed || ( p_middle < p_cutoff && flanking_matched*20>(flanking_matched+ flanking_unmatched )*16))
snp_fisher_raw [i] = p_middle;
else snp_fisher_raw [i] = -999;
@@ -1296,6 +1314,7 @@ void EXSNP_SIGINT_hook(int param)
unlink(del_name);
}
}
+ closedir(d);
}
}
@@ -1328,8 +1347,8 @@ int SNP_calling(char * in_SAM_file, char * out_BED_file, char * in_FASTA_file, c
fisher_test_size = 0;
- precalculated_factorial = (double*)malloc(sizeof(double)*PRECALCULATE_FACTORIAL);
- for(i=0; i<PRECALCULATE_FACTORIAL; i++)
+ precalculated_factorial = (long double*)malloc(sizeof(long double)*PRECALCULATE_FACTORIAL * 2);
+ for(i=0; i<PRECALCULATE_FACTORIAL * 2; i++)
precalculated_factorial[i] = 0.;
@@ -1493,7 +1512,7 @@ void print_usage_snp(char * myname)
SUBREADputs(" location must have (ie. the minimum coverage). 1 by default.");
SUBREADputs("");
SUBREADputs(" -x <int> Specify the maximum number of mapped reads a SNP-containing");
- SUBREADputs(" location have have. 3000 by default. Any location having more than");
+ SUBREADputs(" location have have. 1000 by default. Any location having more than");
SUBREADputs(" the threshold number of reads will not be considered for SNP");
SUBREADputs(" calling. This option is useful for removing PCR artefacts.");
SUBREADputs("");
@@ -1567,7 +1586,7 @@ int main_snp_calling_test(int argc,char ** argv)
parameters.supporting_read_rate = 0.;
parameters.min_supporting_read_number = 1;
parameters.min_alternative_read_number = 1;
- parameters.max_supporting_read_number = 3000;
+ parameters.max_supporting_read_number = 1000;
parameters.neighbour_filter_testlen = -1;
parameters.neighbour_filter_rate = 0.000000001;
parameters.min_phred_score = 13;
diff --git a/src/core-indel.c b/src/core-indel.c
index 9a0fe5a..31d4bfe 100644
--- a/src/core-indel.c
+++ b/src/core-indel.c
@@ -1752,7 +1752,7 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
{
unsigned int head_indel_left_edge = head_indel_pos + current_result->selected_position - 1;
//head_indel_left_edge -= max(0, head_indel_movement);
- if(head_indel_left_edge>=0 && abs(head_indel_movement)<=global_context -> config.max_indel_length)
+ if(abs(head_indel_movement)<=global_context -> config.max_indel_length)
{
local_add_indel_event(global_context, thread_context, event_table, read_text + head_indel_pos, head_indel_left_edge, head_indel_movement, 1, 1, 0);
mark_gapped_read(current_result);
@@ -2126,7 +2126,7 @@ int finalise_db_graph(global_context_t * global_context, reassembly_block_contex
else
{
int test_next;
- int all_reads;
+ int all_reads = 0;
int ignored_moves=0;
int max_move_reads=0;
unsigned long long int previous_key = current_key<<2;
@@ -3886,6 +3886,7 @@ void COREMAIN_SIGINT_hook(int param)
unlink(del_name);
}
}
+ closedir(d);
}
}
diff --git a/src/core.c b/src/core.c
index 7423c0e..3e47262 100644
--- a/src/core.c
+++ b/src/core.c
@@ -1221,34 +1221,37 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
{
chimeric_sections = chimeric_cigar_parts(global_context, r->linear_position, is_first_section_jumped ^ current_strand, is_first_section_jumped, r->current_cigar_decompress, r->out_poses, output_context->out_cigar_buffer, r->out_strands, read_len, r->out_lens);
- int xk1;
- r->chimeric_sections = chimeric_sections;
- strcpy(r->out_cigars[0], output_context->out_cigar_buffer[0]);
- for(xk1=1; xk1<chimeric_sections; xk1++)
- {
- int chimeric_pos;
- char * chimaric_chr;
- strcpy(r->out_cigars[xk1], output_context->out_cigar_buffer[xk1]);
- unsigned int vitual_head_pos = r->out_poses[xk1];
- char strand_xor = (r->out_strands[xk1] == '-') != is_second_read;//!= (r->out_strands[0]=='-') ;
+ if(chimeric_sections > 0){
+ int xk1;
+ r->chimeric_sections = chimeric_sections;
+ strcpy(r->out_cigars[0], output_context->out_cigar_buffer[0]);
+ for(xk1=1; xk1<chimeric_sections; xk1++)
+ {
+ int chimeric_pos;
+ char * chimaric_chr;
+ strcpy(r->out_cigars[xk1], output_context->out_cigar_buffer[xk1]);
+ unsigned int vitual_head_pos = r->out_poses[xk1];
+ char strand_xor = (r->out_strands[xk1] == '-') != is_second_read;//!= (r->out_strands[0]=='-') ;
- //if(( r->out_strands[xk1] == '-' ) != (r->out_strands[0]=='-' )) vitual_head_pos = move_to_read_head(vitual_head_pos, r->out_cigars[xk1]);
+ //if(( r->out_strands[xk1] == '-' ) != (r->out_strands[0]=='-' )) vitual_head_pos = move_to_read_head(vitual_head_pos, r->out_cigars[xk1]);
- if(0==locate_gene_position_max(vitual_head_pos ,& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, NULL, NULL, 0+r->out_lens[xk1]))
- {
- int soft_clipping_movement = 0;
- soft_clipping_movement = get_soft_clipping_length(r->out_cigars[xk1]);
- assert(chimaric_chr);
- sprintf(r->additional_information + strlen(r->additional_information), "\tCG:Z:%s\tCP:i:%u\tCT:Z:%c\tCC:Z:%s", r->out_cigars[xk1] , max(1,chimeric_pos + soft_clipping_movement + 1), strand_xor?'-':'+' , chimaric_chr );
+ if(0==locate_gene_position_max(vitual_head_pos ,& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, NULL, NULL, 0+r->out_lens[xk1]))
+ {
+ int soft_clipping_movement = 0;
+ soft_clipping_movement = get_soft_clipping_length(r->out_cigars[xk1]);
+ assert(chimaric_chr);
+ sprintf(r->additional_information + strlen(r->additional_information), "\tCG:Z:%s\tCP:i:%u\tCT:Z:%c\tCC:Z:%s", r->out_cigars[xk1] , max(1,chimeric_pos + soft_clipping_movement + 1), strand_xor?'-':'+' , chimaric_chr );
+ }
+ else is_r_OK = 0;
}
- else is_r_OK = 0;
- }
- r->linear_position = r->out_poses[0];
- r->strand = r->out_strands[0]=='-';
+ r->linear_position = r->out_poses[0];
+ r->strand = r->out_strands[0]=='-';
- strcpy(r->cigar , r->out_cigars[0]);
+ strcpy(r->cigar , r->out_cigars[0]);
+ }else is_r_OK = 0;
}
- r->soft_clipping_movements = get_soft_clipping_length(r->cigar);
+ if(is_r_OK)
+ r->soft_clipping_movements = get_soft_clipping_length(r->cigar);
}
if(is_r_OK)
@@ -4189,6 +4192,14 @@ int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos
int curr_offset, new_offset;
locate_gene_position_max(current_perfect_cursor, &global_context -> chromosome_table, & curr_chr, & curr_offset, NULL, NULL, 1);
locate_gene_position_max(jummped_location , &global_context -> chromosome_table, & new_chr, & new_offset, NULL, NULL, 1);
+ if( curr_chr == NULL || new_chr == NULL ){
+ /*
+ char outpos[100];
+ absoffset_to_posstr(global_context, sel_pos + 1, outpos);
+ SUBREADprintf("Wrong CIGAR: mapped to %s, CIGAR=%s\n", outpos , in_cigar);
+ */
+ return -1;
+ }
assert(curr_chr);
assert(new_chr);
is_chro_jump = (curr_chr != new_chr);
diff --git a/src/input-files.c b/src/input-files.c
index 9b704e3..7e6f8f4 100644
--- a/src/input-files.c
+++ b/src/input-files.c
@@ -2181,6 +2181,7 @@ void delete_with_prefix(char * prefix){
//test fix
}
}
+ closedir(d);
}
}
@@ -2345,6 +2346,19 @@ void SAM_pairer_set_unsorted_notification(SAM_pairer_context_t * pairer, void (*
pairer -> unsorted_notification = unsorted_notification;
}
+
+int SAM_pairer_warning_file_open_limit(){
+
+ struct rlimit limit_st;
+ getrlimit(RLIMIT_NOFILE, & limit_st);
+
+ if(min(limit_st.rlim_cur, limit_st.rlim_max ) < MIN_FILE_POINTERS_ALLOWED){
+ SUBREADprintf(" ERROR: the maximum file open number (%d) is too low. Please increase this number to a number larger than 50 by using the 'ulimit -n' command. This program has to terminate now.\n\n",(int)(min(limit_st.rlim_cur, limit_st.rlim_max)));
+ return 1;
+ }
+ return 0;
+}
+
// Tiny_Mode only write the following information:
// Name Flag Chro Pos Mapq Cigar MateChro MatePos Tlen N I NH:i:xx HI:i:xx
// Tiny_Mode does not work when output and input are both in BAM format
@@ -2357,6 +2371,8 @@ int SAM_pairer_create(SAM_pairer_context_t * pairer, int all_threads, int bin_bu
pairer -> input_fp = f_subr_open(in_file, "rb");
if(NULL == pairer -> input_fp) return 1;
+ SAM_pairer_warning_file_open_limit(pairer);
+
pairer -> input_is_BAM = BAM_input;
pairer -> tiny_mode = is_Tiny_Mode;
pairer -> reset_output_function = reset_output_function;
@@ -2696,10 +2712,10 @@ int SAM_pairer_get_next_read_BIN( SAM_pairer_context_t * pairer , SAM_pairer_thr
BAM_next_u32(bam_signature);
BAM_next_u32(pairer -> BAM_l_text);
char * header_txt = NULL;
+ int header_txt_dynamic_length = -1;
- if(pairer->BAM_l_text>0) header_txt = malloc(pairer->BAM_l_text);
+ if(pairer->BAM_l_text>0) header_txt = malloc(max(1000000,pairer->BAM_l_text));
- //SUBREADprintf("HEAD_TXT=%d\n", pairer -> BAM_l_text);
for(x1 = 0 ; x1 < pairer -> BAM_l_text; x1++){
BAM_next_nch;
header_txt [x1] = nch;
@@ -2707,13 +2723,23 @@ int SAM_pairer_get_next_read_BIN( SAM_pairer_context_t * pairer , SAM_pairer_thr
pairer -> output_header(pairer, thread_context -> thread_id, 1, pairer -> BAM_l_text , header_txt , pairer -> BAM_l_text );
BAM_next_u32(pairer -> BAM_n_ref);
- //SUBREADprintf("HEAD_CHRO=%d\n", pairer -> BAM_n_ref);
unsigned int ref_bin_len = 0;
for(x1 = 0; x1 < pairer -> BAM_n_ref; x1++) {
unsigned int l_name, l_ref, x2;
char ref_name[MAX_CHROMOSOME_NAME_LEN];
BAM_next_u32(l_name);
assert(l_name < 256);
+
+ if(header_txt == NULL){
+ header_txt = malloc(3000000);
+ header_txt_dynamic_length = 3000000;
+ }
+
+ if( header_txt_dynamic_length>0 && ref_bin_len > header_txt_dynamic_length - 1000000 ){
+ header_txt_dynamic_length *= 2;
+ header_txt = realloc( header_txt, header_txt_dynamic_length);
+ }
+
memcpy(header_txt + ref_bin_len, &l_name, 4);
ref_bin_len += 4;
for(x2 = 0; x2 < l_name; x2++){
@@ -2725,10 +2751,7 @@ int SAM_pairer_get_next_read_BIN( SAM_pairer_context_t * pairer , SAM_pairer_thr
memcpy(header_txt + ref_bin_len, &l_ref, 4);
ref_bin_len += 4;
- if(ref_bin_len >= pairer -> BAM_l_text){
- SUBREADprintf("%d-th ref : %s [len=%u], bin_len=%d < %d\n", x1, ref_name, l_ref, ref_bin_len, pairer -> BAM_l_text);
- assert(ref_bin_len < pairer -> BAM_l_text);
- }
+ //SUBREADprintf("%d-th ref : %s [len=%u], bin_len=%d < %d\n", x1, ref_name, l_ref, ref_bin_len, pairer -> BAM_l_text);
}
//exit(0);
@@ -3327,6 +3350,7 @@ void SAM_pairer_reset( SAM_pairer_context_t * pairer ) {
pairer -> BAM_header_parsed = 0;
pairer -> total_input_reads = 0;
pairer -> input_chunk_no = 0;
+ pairer -> merge_level_finished = 0;
for(x1 = 0; x1 < pairer -> total_threads ; x1 ++){
pairer -> threads[x1].reads_in_SBAM = 0;
pairer -> threads[x1].input_buff_BIN_used = 0;
@@ -3452,8 +3476,6 @@ int SAM_pairer_do_next_read( SAM_pairer_context_t * pairer , SAM_pairer_thread_t
int has_next_read = SAM_pairer_get_next_read_BIN(pairer, thread_context, &bin, &bin_len);
if(has_next_read){
int name_len = SAM_pairer_get_read_full_name(pairer, thread_context, bin, bin_len, read_full_name, & this_flags);
- if(0 && FIXLENstrcmp("V0112_0155:7:1206:5677:116578", read_full_name) == 0)
- SUBREADprintf("FNNM:%s, FLAG=%d, LASTFN=%s\n", read_full_name , this_flags, thread_context -> immediate_last_read_full_name);
if(pairer -> is_single_end_mode == 0 && ( this_flags & 1 ) == 1){ // if the reads are PE
@@ -3569,7 +3591,7 @@ int SAM_pairer_osr_next_name(FILE * fp , char * name, int thread_no, int all_thr
int rlen2 = fread(name, 1, rlen, fp);
if(rlen2 != rlen) return 0;
name[rlen]=0;
- if( SAM_pairer_osr_hash(name)% all_threads == thread_no )
+ if(all_threads < 0 || SAM_pairer_osr_hash(name)% all_threads == thread_no )
{
fseek(fp, -2-rlen, SEEK_CUR);
return 1;
@@ -3607,42 +3629,256 @@ int SAM_pairer_is_matched_chunks(char * c1, char * c2){
return i2==i1;
}
-void * SAM_pairer_rescure_orphants(void * params){
- void ** param_ptr = (void **) params;
- SAM_pairer_context_t * pairer = param_ptr[0];
- int thread_no = (int)(param_ptr[1]-NULL);
- free(params);
- int orphant_fp_size = 50, orphant_fp_no=0;
- FILE ** orphant_fps = malloc(sizeof(FILE *) * orphant_fp_size);
- int thno, bkno, x1;
+
+
+
+void merge_level_fps(SAM_pairer_context_t * pairer, char * fname, FILE ** fps, int fps_no){
char * bin_tmp1 , * bin_tmp2;
+ int max_name_len = MAX_READ_NAME_LEN*2 +80, x1;
- if(0 == thread_no && pairer -> display_progress)
- SUBREADprintf("Finished scanning the input file. Processing unpaired reads.\n");
+ char tmp_fname[MAX_FILE_NAME_LENGTH];
+ sprintf(tmp_fname, "%s-MERGE-TMP.tmp", pairer->tmp_file_prefix);
+
+ char * names = malloc( fps_no * max_name_len );
bin_tmp1 = malloc(66000);
bin_tmp2 = malloc(66000);
+ FILE * out_fp = fopen(tmp_fname, "wb");
+
+
+ // initialize the "current_first_name" for each orphan file
+
+ for(x1 = 0 ; x1 < fps_no; x1++)
+ {
+ int has = SAM_pairer_osr_next_name( fps[x1] , names + max_name_len*x1 , -1 , -1);
+ if(!has) *(names + max_name_len*x1)=0;
+ }
+
+
+ while(1){
+ int min_name_fileno = -1;
+ int min2_name_fileno = -1;
+
+ // find the min_name in all FPs
+ // and find the same min_name if there is any
+
+ for(x1 = 0 ; x1 < fps_no; x1++){
+ int has = *(names + max_name_len*x1);
+ if(has){
+ int strcv_12 = 1;
+ if(min_name_fileno >=0) strcv_12 = strcmp(names+(min_name_fileno * max_name_len), names+(x1 * max_name_len));
+ if(strcv_12 > 0){
+ min_name_fileno = x1;
+ min2_name_fileno = -1;
+ }else if( strcv_12 == 0){
+ min2_name_fileno = x1;
+ }
+ }
+
+ }
+
+
+ if(min_name_fileno >= 0){
+ SAM_pairer_osr_next_bin( fps[ min_name_fileno ] , bin_tmp1);
+
+ if(min2_name_fileno>=0){
+ SAM_pairer_osr_next_bin( fps[ min2_name_fileno ] , bin_tmp2);
+ pairer -> output_function(pairer, 0, names + max_name_len*min_name_fileno , (char*) bin_tmp1, (char*)bin_tmp2);
+
+ if(0 == pairer -> is_unsorted_notified){
+ char * name_tmp_1 = malloc(strlen(names+(min_name_fileno * max_name_len))+5), *name_tmp_2 = malloc(strlen(names+(min_name_fileno * max_name_len))+5);
+ char * min1_chunk_info, * min2_chunk_info;
+ sprintf(name_tmp_1, "C:%s:%d", names+(min_name_fileno * max_name_len), 0);
+ sprintf(name_tmp_2, "C:%s:%d", names+(min2_name_fileno * max_name_len), 1);
+ min1_chunk_info = HashTableGet( pairer -> unsorted_notification_table , name_tmp_1);
+ min2_chunk_info = HashTableGet( pairer -> unsorted_notification_table , name_tmp_2);
+ if(min1_chunk_info == NULL || min2_chunk_info == NULL || !SAM_pairer_is_matched_chunks(min1_chunk_info, min2_chunk_info)){
+ sprintf(name_tmp_1, "B:%s:%d", names+(min_name_fileno * max_name_len), 0);
+ if( pairer -> unsorted_notification ){
+ //SUBREADprintf("FINAL STEP\n");
+ pairer -> unsorted_notification(pairer , HashTableGet( pairer -> unsorted_notification_table , name_tmp_1), NULL);
+ }
+ pairer -> is_unsorted_notified = 1;
+ }
+ }
+
+ int read_has = SAM_pairer_osr_next_name( fps[min2_name_fileno], names + max_name_len*min2_name_fileno, -1, -1);
+ if(!read_has) *(names + max_name_len*min2_name_fileno)=0;
+ }else{
+ unsigned short wlen;
+ unsigned int rbinlen = 0;
+ wlen = strlen( names+(min_name_fileno * max_name_len) );
+ fwrite( &wlen, 2, 1,out_fp );
+ fwrite( names+(min_name_fileno * max_name_len), 1, wlen, out_fp );
+ memcpy( &rbinlen, bin_tmp1 , 2);
+ rbinlen += 4;
+ fwrite( bin_tmp1, 2, 1, out_fp );
+ fwrite( bin_tmp1, 1, rbinlen, out_fp );
+ }
+ int read_has = SAM_pairer_osr_next_name( fps[min_name_fileno], names + max_name_len*min_name_fileno, -1, -1);
+ if(!read_has) *(names + max_name_len*min_name_fileno)=0;
+ } else break;
+ }
+ fclose(out_fp);
+ unlink(fname);
+ rename(tmp_fname, fname);
+ free(names);
+
+}
+#define PAIRER_WAIT_TICK_TIME 10000
+
+int SAM_pairer_get_merge_max_fp(SAM_pairer_context_t * pairer){
+ return pairer -> max_file_open_number;
+
+}
+
+void SAM_pairer_set_merge_max_fp(SAM_pairer_context_t * pairer, int fon){
+ pairer -> max_file_open_number = fon;
+}
+
+
+void SAM_pairer_probe_maxfp( SAM_pairer_context_t * pairer){
+ int orphant_fp_no=0;
+ int thno, bkno, x1;
+ int thread_fps [ pairer -> total_threads ];
+ char tmp_fname[MAX_FILE_NAME_LENGTH];
+
+ memset(thread_fps, 0, sizeof(int) * pairer -> total_threads);
for( thno = 0 ; thno < pairer -> total_threads ; thno ++ ){
for( bkno = 0 ; ; bkno++){
- char tmp_fname[MAX_FILE_NAME_LENGTH];
sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix, thno, bkno);
-
FILE * in_fp = fopen(tmp_fname, "rb");
if(NULL == in_fp) break;
- if(orphant_fp_no >= orphant_fp_size){
- orphant_fp_size *= 1.5;
+ thread_fps[thno] = bkno;
+ fclose(in_fp);
+ orphant_fp_no ++;
+ }
+ }
+
+ int max_open_fps = 0, has_limit = 0;
+ int orphant_fp_size = 50;
+ FILE ** orphant_fps = malloc(sizeof(FILE *) * orphant_fp_size);
+
+ for( bkno = 0 ; bkno < 5; bkno++){
+ sprintf(tmp_fname, "%s-FTEST-%d.tmp", pairer->tmp_file_prefix, bkno);
+ FILE * tfp = fopen(tmp_fname, "w");
+ if(NULL == tfp){
+ has_limit = 1;
+ break;
+ }
+ orphant_fps[max_open_fps++] = tfp;
+ }
+ //#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
+ for( thno = 0 ; thno < pairer -> total_threads ; thno ++ ){
+ if(has_limit) break;
+ for( bkno = 0 ; ; bkno++){
+ sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix, thno, bkno);
+ FILE * in_fp = fopen(tmp_fname, "rb");
+ if(NULL == in_fp){
+ if( bkno <= thread_fps[thno] ) has_limit = 1;
+ break;
+ }
+ orphant_fps[max_open_fps++] = in_fp;
+ if(max_open_fps >= orphant_fp_size - 1){
+ orphant_fp_size *= 2;
orphant_fps = realloc(orphant_fps, orphant_fp_size * sizeof(FILE *));
}
- orphant_fps[orphant_fp_no++]=in_fp;
}
}
- int max_name_len = MAX_READ_NAME_LEN*2 +80;
+ for( bkno = 0 ;bkno < max_open_fps; bkno ++) fclose(orphant_fps[bkno]);
+
+ SAM_pairer_set_merge_max_fp(pairer, max_open_fps - 5);
+
+ //#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
+ //SUBREADprintf("Needed FPS = %d, Ulimit FPS = %d, Has_Limit = %d \n", orphant_fp_no, max_open_fps, has_limit);
+
+ if( SAM_pairer_get_merge_max_fp(pairer) < orphant_fp_no * pairer -> total_threads){
+ int processed_orphant = 0;
+ int current_opened_fp_no = 0 ;
+ FILE * level_merge_fps [ SAM_pairer_get_merge_max_fp(pairer) ];
+ for( thno = 0 ; thno < pairer -> total_threads ; thno ++ ){
+ for( bkno = 0 ; ; bkno++){
+ char tmp_fname[MAX_FILE_NAME_LENGTH];
+ sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix, thno, bkno);
+
+ FILE * in_fp = fopen(tmp_fname, "rb");
+ if(NULL == in_fp) break;
+
+ // #warning ">>>> COMMENT DEBUG OUTPUT <<<<"
+ // SUBREADprintf("Adding temp file:%s\n", tmp_fname);
+ level_merge_fps[current_opened_fp_no ++] = in_fp;
+ processed_orphant ++;
+ if(current_opened_fp_no >= SAM_pairer_get_merge_max_fp(pairer) || processed_orphant == orphant_fp_no){
+ sprintf(tmp_fname, "%s-LEVELMERGE.tmp", pairer->tmp_file_prefix);
+
+ // #warning ">>>> COMMENT DEBUG OUTPUT <<<<"
+ // SUBREADprintf("Merging temp files\n");
+ merge_level_fps(pairer , tmp_fname, level_merge_fps, current_opened_fp_no);
+ for(x1 = 0; x1 < current_opened_fp_no; x1++) fclose(level_merge_fps[x1]);
+
+ if(processed_orphant < orphant_fp_no){
+ level_merge_fps[0] = fopen(tmp_fname, "rb");
+ current_opened_fp_no = 1;
+ }
+ }
+ }
+ }
+ pairer -> merge_level_finished = 1;
+ }
+ free(orphant_fps);
+
+}
+
+void * SAM_pairer_rescure_orphants_max_FP(void * params){
+ void ** param_ptr = (void **) params;
+ SAM_pairer_context_t * pairer = param_ptr[0];
+ int thread_no = (int)(param_ptr[1]-NULL);
+ free(params);
+
+ unsigned long long died=0;
+ int orphant_fp_no=0;
+ int thno, bkno, x1;
+ char tmp_fname[MAX_FILE_NAME_LENGTH];
+
+ int max_name_len = MAX_READ_NAME_LEN*2 +80, orphant_fp_size = 50;
+ FILE ** orphant_fps = malloc(sizeof(FILE *) * orphant_fp_size);
+
+ if(0 == thread_no && pairer -> display_progress)
+ SUBREADprintf("Finished scanning the input file. Processing unpaired reads.\n");
+
+ //SUBREADprintf("merged = %d\n", pairer -> merge_level_finished);
+ if(pairer -> merge_level_finished){
+ sprintf(tmp_fname, "%s-LEVELMERGE.tmp", pairer->tmp_file_prefix);
+ FILE * in_fp = fopen(tmp_fname, "rb");
+ orphant_fps[0] = in_fp;
+ orphant_fp_no=1;
+ }else{
+ orphant_fp_no = 0;
+ for( thno = 0 ; thno < pairer -> total_threads ; thno ++ ){
+ for( bkno = 0 ; ; bkno++){
+ sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix, thno, bkno);
+
+ FILE * in_fp = fopen(tmp_fname, "rb");
+ if(NULL == in_fp) break;
+ if(orphant_fp_no >= orphant_fp_size){
+ orphant_fp_size *= 1.5;
+ orphant_fps = realloc(orphant_fps, orphant_fp_size * sizeof(FILE *));
+ }
+ orphant_fps[orphant_fp_no++]=in_fp;
+ }
+ }
+ }
+
char * names = malloc( orphant_fp_no * max_name_len );
memset(names, 0, orphant_fp_no * max_name_len );
+ char * bin_tmp1 , * bin_tmp2;
+ bin_tmp1 = malloc(66000);
+ bin_tmp2 = malloc(66000);
+
for(x1 = 0 ; x1 < orphant_fp_no; x1++)
{
@@ -3650,7 +3886,6 @@ void * SAM_pairer_rescure_orphants(void * params){
if(!has) *(names + max_name_len*x1)=0;
}
- unsigned long long rescured=0, died=0;
while(1){
int min_name_fileno = -1;
@@ -3685,8 +3920,10 @@ void * SAM_pairer_rescure_orphants(void * params){
sprintf(name_tmp_2, "C:%s:%d", names+(min2_name_fileno * max_name_len), 1);
min1_chunk_info = HashTableGet( pairer -> unsorted_notification_table , name_tmp_1);
min2_chunk_info = HashTableGet( pairer -> unsorted_notification_table , name_tmp_2);
+ //#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
//SUBREADprintf("RESCURE MATCHER: %s , %s == %s , %s, %s\n", name_tmp_1, name_tmp_2, min1_chunk_info, min2_chunk_info,
// SAM_pairer_is_matched_chunks(min1_chunk_info, min2_chunk_info)?"MATCH":"XXXXX");
+
if(min1_chunk_info == NULL || min2_chunk_info == NULL || !SAM_pairer_is_matched_chunks(min1_chunk_info, min2_chunk_info)){
sprintf(name_tmp_1, "B:%s:%d", names+(min_name_fileno * max_name_len), 0);
if( pairer -> unsorted_notification ){
@@ -3699,19 +3936,26 @@ void * SAM_pairer_rescure_orphants(void * params){
int read_has = SAM_pairer_osr_next_name( orphant_fps[min2_name_fileno], names + max_name_len*min2_name_fileno, thread_no, pairer-> total_threads);
if(!read_has) *(names + max_name_len*min2_name_fileno)=0;
- rescured++;
}else{
+ //#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
//SUBREADprintf("FINAL_ORPHAN:%s\n" , names + max_name_len*min_name_fileno);
pairer -> output_function(pairer, thread_no, names + max_name_len*min_name_fileno, (char*) bin_tmp1, NULL);
died++;
}
int read_has = SAM_pairer_osr_next_name( orphant_fps[min_name_fileno], names + max_name_len*min_name_fileno, thread_no, pairer-> total_threads);
+ //#warning ">>>>>>> COMMENT NEXT BLOCK <<<<<<<<"
+ if(0){
+ if(!read_has) SUBREADprintf("FP %d FINISHED\n", min_name_fileno);
+ }
if(!read_has) *(names + max_name_len*min_name_fileno)=0;
} else break;
}
free(names);
+ //#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
+ //SUBREADprintf("finished_fps= %d\n", orphant_fp_no);
+
for(x1 = 0 ; x1 < orphant_fp_no; x1++)
{
fclose ( orphant_fps[x1] );
@@ -3719,10 +3963,10 @@ void * SAM_pairer_rescure_orphants(void * params){
free( bin_tmp1 );
free( bin_tmp2 );
pairer -> total_orphan_reads += died;
- //SUBREADprintf("RESCURE THREAD %d Rescured %llu, Died %llu\n", thread_no, rescured, died);
return NULL;
}
+
void SAM_pairer_update_orphant_table(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context){
unsigned int x2 = 0;
unsigned char ** name_list, ** bin_list;
@@ -3815,7 +4059,17 @@ int is_read_bin(char * bin, int bin_len, int max_refID){
int cigar_op = cigar_v & 0xf;
int cigar_value = cigar_v & 0xfffffff;
if(cigar_op > 8) return -12;
- if((cigar_op == 0 || cigar_op == 1 || cigar_op > 4) && (cigar_value < 1 || cigar_value > MAX_BIN_RECORD_LENGTH)) return -13;
+
+ if((cigar_op == 0 || cigar_op == 1 || cigar_op > 6) && (cigar_value < 1 || cigar_value > MAX_BIN_RECORD_LENGTH)){
+
+ //#warning ">>>>>> COMMENT NEXT LINE IN RELEASE <<<<<<"
+ if(0){
+ char * rname = bin + 36;
+ SUBREADprintf("OP=%d, VAL=%d [%s]\n", cigar_op, cigar_value, rname);
+ }
+
+ return -13;
+ }
}
int ext_cursor = 36 + name_len + 4*cigar_opts + l_seq + (l_seq+1)/2;
@@ -3857,7 +4111,6 @@ int SAM_pairer_find_start(SAM_pairer_context_t * pairer , SAM_pairer_thread_t *
}
}
-#define PAIRER_WAIT_TICK_TIME 10000
void * SAM_pairer_thread_run( void * params ){
void ** param_ptr = (void **) params;
@@ -3931,6 +4184,9 @@ int SAM_pairer_run_once( SAM_pairer_context_t * pairer){
}
if(0 == pairer -> is_bad_format){
+
+ SAM_pairer_probe_maxfp( pairer );
+
for(x1 = 0; x1 < pairer -> total_threads ; x1++){
// this 16-byte memory block is freed in the thread worker.
@@ -3938,7 +4194,7 @@ int SAM_pairer_run_once( SAM_pairer_context_t * pairer){
init_params[0] = pairer;
init_params[1] = (void *)(NULL+x1);
- pthread_create(&(pairer -> threads[x1].thread_stab), NULL, SAM_pairer_rescure_orphants, init_params);
+ pthread_create(&(pairer -> threads[x1].thread_stab), NULL, SAM_pairer_rescure_orphants_max_FP, init_params);
}
for(x1 = 0; x1 < pairer -> total_threads ; x1++){
@@ -4189,7 +4445,11 @@ void SAM_pairer_fix_format(SAM_pairer_context_t * pairer){
block_size += (nch << (8 * x1));
}
- //SUBREADprintf("Bsize=%d\n", block_size);
+ //#warning ">>>>>> COMMENT NEXT BLOCK <<<<<<"
+ if(0){
+ if(block_size > 65000)
+ SUBREADprintf("Bsize=%d\n", block_size);
+ }
if(block_size > 60000 && !pairer -> tiny_mode){
pairer -> is_bad_format = 1;
SUBREADprintf("ERROR: the read record length (%d) is longer than the limit. The program has to terminate. \n", block_size);
@@ -4454,7 +4714,7 @@ void SAM_nosort_run_once(SAM_pairer_context_t * pairer){
unsigned int bam_signature;
NOSORT_BAM_next_u32(bam_signature);
NOSORT_BAM_next_u32(pairer -> BAM_l_text);
- char * header_txt = malloc(pairer->BAM_l_text);
+ char * header_txt = malloc(max(1000000,pairer->BAM_l_text));
for(x1 = 0 ; x1 < pairer -> BAM_l_text; x1++){
NOSORT_BAM_next_nch;
@@ -4669,12 +4929,14 @@ int SAM_pairer_run( SAM_pairer_context_t * pairer){
if(pairer -> force_do_not_sort){
SAM_nosort_run_once(pairer);
- }else for(corrected_run = 0; corrected_run < 2 ; corrected_run ++){
+
+ }else for(corrected_run = 0; corrected_run < 2 ; corrected_run ++){
SAM_pairer_run_once(pairer);
if(pairer -> is_bad_format && pairer->input_is_BAM){
- assert(0 == corrected_run);
- if(pairer -> display_progress)
- SUBREADprintf("Retrying with the corrected format...\n");
+ //#warning ">>>>>> REMOVE '+ 1' FROM NEXT LINE IN RELEASE <<<<<<"
+ assert(1 != corrected_run);
+ //#warning ">>>>>> COMMENT NEXT LINE IN RELEASE <<<<<<"
+ //SUBREADprintf("Retrying with the corrected format...\n");
delete_with_prefix(pairer -> tmp_file_prefix);
SAM_pairer_fix_format(pairer);
if(pairer -> is_bad_format)
diff --git a/src/input-files.h b/src/input-files.h
index f98ff3b..d7a96c3 100644
--- a/src/input-files.h
+++ b/src/input-files.h
@@ -36,6 +36,7 @@
#define GENE_INPUT_SAM_PAIR_2 95
+#define MIN_FILE_POINTERS_ALLOWED 50
#define FILE_TYPE_SAM 50
#define FILE_TYPE_BAM 500
@@ -120,6 +121,8 @@ typedef struct {
int is_single_end_mode;
int force_do_not_sort;
int is_finished;
+ int merge_level_finished;
+ int max_file_open_number;
subread_lock_t input_fp_lock;
subread_lock_t output_header_lock;
subread_lock_t unsorted_notification_lock;
@@ -303,4 +306,5 @@ int SAM_pairer_multi_thread_header (void * pairer_vp, int thread_no, int is_text
int SAM_pairer_writer_create( SAM_pairer_writer_main_t * bam_main , int all_threads, int has_dummy , int BAM_output, int BAM_compression_level, char * out_file);
void SAM_pairer_writer_destroy( SAM_pairer_writer_main_t * bam_main ) ;
int SAM_pairer_iterate_int_tags(unsigned char * bin, int bin_len, char * tag_name, int * saved_value);
+int SAM_pairer_warning_file_open_limit();
#endif
diff --git a/src/makefile.version b/src/makefile.version
index 177d75d..3266789 100644
--- a/src/makefile.version
+++ b/src/makefile.version
@@ -1,4 +1,4 @@
-SUBREAD_VERSION_BASE=1.5.0-p2
+SUBREAD_VERSION_BASE=1.5.0-p3
SUBREAD_VERSION_DATE=$(SUBREAD_VERSION_BASE)-$(shell date +"%d%b%Y")
SUBREAD_VERSION="$(SUBREAD_VERSION_DATE)"
SUBREAD_VERSION="$(SUBREAD_VERSION_BASE)"
diff --git a/src/propmapped.c b/src/propmapped.c
index f5d16c6..064dae7 100644
--- a/src/propmapped.c
+++ b/src/propmapped.c
@@ -109,6 +109,7 @@ void PROPMAPPED_SIGINT_hook(int param)
unlink(del_name);
}
}
+ closedir(d);
}
}
diff --git a/src/readSummary.c b/src/readSummary.c
index a1bbc6d..8457f9c 100644
--- a/src/readSummary.c
+++ b/src/readSummary.c
@@ -936,14 +936,14 @@ int load_feature_info(fc_thread_global_context_t *global_context, const char * a
ret_features[xk1].chro_name_pos_delta = chro_name_pos - ret_features[xk1].feature_name_pos;
ret_features[xk1].start = atoi( start_ptr );// start
- if(ret_features[xk1].start<0 || ret_features[xk1].start>0x7fffffff)
+ if(ret_features[xk1].start>0x7fffffff)
{
ret_features[xk1].start = 0;
print_in_box(80,0,0,"WARNING the %d-th line has a negative chro coordinate.", lineno);
}
ret_features[xk1].end = atoi( end_ptr );//end
- if(ret_features[xk1].end<0 || ret_features[xk1].end>0x7fffffff)
+ if(ret_features[xk1].end>0x7fffffff)
{
ret_features[xk1].end = 0;
print_in_box(80,0,0,"WARNING the %d-th line has a negative chro coordinate.", lineno);
@@ -4145,8 +4145,7 @@ int readSummary(int argc,char *argv[]){
else isRestrictlyNoOvelrapping = 0;
-
-
+ if(SAM_pairer_warning_file_open_limit()) return -1;
fc_thread_global_context_t global_context;
diff --git a/src/test-fisher.c b/src/test-fisher.c
new file mode 100644
index 0000000..610b7fe
--- /dev/null
+++ b/src/test-fisher.c
@@ -0,0 +1,84 @@
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include "subread.h"
+#include "HelperFunctions.h"
+
+
+
+double factorial_float(int a)
+{
+
+ double ret = 0;
+ while(a)
+ ret += log(a--);
+ return ret;
+}
+
+
+
+double fisherSub(int a, int b, int c, int d)
+{
+ double ret = factorial_float(a+b) + factorial_float(c+d) + factorial_float(a+c) + factorial_float(b+d) ;
+ ret -= factorial_float(a) + factorial_float(b) + factorial_float(c) + factorial_float(d) + factorial_float(a+b+c+d);
+ return pow(2.71828183, ret);
+}
+
+
+
+
+/**
+ * See HELP string or run with no arguments for usage.
+ * <p>
+ * The code used to calculate a Fisher p-value comes originally from a
+ * <a href="http://infofarm.affrc.go.jp/~kadasowa/fishertest.htm">JavaScript program</a>
+ * by T. Kadosawa (kadosawa at niaes.affrc.go.jp).
+ * Retrieved from http://www.users.zetnet.co.uk/hopwood/tools/StatTests.java on 3/Jul/2012
+ *
+ * @author David Hopwood
+ * @date 2000/04/23
+ */
+
+double fisher_exact_test(int a, int b, int c, int d)
+{
+
+ if (a * d > b * c) {
+ a = a + b; b = a - b; a = a - b;
+ c = c + d; d = c - d; c = c - d;
+ }
+
+ if (a > d) { a = a + d; d = a - d; a = a - d; }
+ if (b > c) { b = b + c; c = b - c; b = b - c; }
+
+ double p_sum = 0.0;
+
+ double p = fisherSub(a, b, c, d);
+ while (a >= 0) {
+ p_sum += p;
+ if (a == 0) break;
+ --a; ++b; ++c; --d;
+ p = fisherSub(a, b, c, d);
+ }
+
+ return p_sum;
+}
+
+
+long double fastfact(int x){
+ return logl(x)*x - x + 0.5 * logl(2*M_PI* x) + 1/(12.*x) - 1./(360.* x*x*x) + 1./(1260.* x*x*x*x*x) - 1./(1680.*x*x*x*x*x*x*x);// + (x>60?0:(1./(1188.*x*x*x*x*x*x*x*x*x ) ));
+}
+
+main(){
+ unsigned int a = 10 , c = 11,
+ b = 11 , d = 5000;
+
+ double fisher, fisher_old;
+ fisher = fast_fisher_test_one_side(a,b,c,d, NULL, 0);
+ fisher_old = fisher_exact_test(a,b,c,d);
+ printf("Log fisher = %.7f ; Old fisher = %.7f\n", log(fisher),log(fisher_old));
+
+ long double x1 = 1E-19L + 1E-20L;
+ long double x2 = 1L - expl(logl(0.5L) + logl(2.0L));
+ printf("New Vals: x1=%LG, x2=%LG\n", x1,x2);
+}
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/subread.git
More information about the debian-med-commit
mailing list