[med-svn] [gmap] 01/06: Imported Upstream version 2016-07-11.v4
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Tue Aug 9 09:08:05 UTC 2016
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit 1250ec2ea5db7b724dda8c3d03b04b81500909f2
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Mon Aug 8 10:41:03 2016 +0200
Imported Upstream version 2016-07-11.v4
---
ChangeLog | 34 +++++++++
src/atoiindex.c | 10 +--
src/comp.h | 2 +-
src/inbuffer.c | 10 ++-
src/pair.c | 2 +-
src/pairpool.c | 2 +-
src/sarray-read.c | 5 +-
src/shortread.c | 173 +++++++++++++++++++++++++++++++--------------
src/stage3.c | 5 +-
src/stage3hr.c | 23 ++++--
util/gtf_genes.pl.in | 12 ++--
util/gtf_introns.pl.in | 12 ++--
util/gtf_splicesites.pl.in | 6 +-
13 files changed, 208 insertions(+), 88 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 6abfd3f..d5366eb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2016-08-02 twu
+
+ * gtf_genes.pl.in, gtf_introns.pl.in, gtf_splicesites.pl.in, util: Merged
+ revision 195495 from trunk to use preference order of desired keys, rather
+ than the text
+
+ * comp.h, pair.c, pairpool.c, src, stage3.c, stage3hr.c: Merging revisions
+ 195548 through 195550 from trunk to revert recent changes to extraexon
+ comp, addition of dual breaks, and restoring final procedure in
+ Stage3pair_optimal_score
+
+ * sarray-read.c: Limiting suffix array procedure by nmisses_allowed again
+
+ * inbuffer.c: Fixed problem where --force-single-end terminated when file
+ had reads that were a multiple of --input-buffer-size
+
+ * shortread.c: Changed debugging statements to write to stderr
+
+ * atoiindex.c: Merged revision 194848 from trunk to fix calculation of oligo
+ using new block algorithm
+
+ * stage3.c: Merged revision 195487 from trunk to use shortgap comp instead
+ of extraexon comp for representing dual breaks
+
+ * comp.h, pair.c, pairpool.c: Merged revision 195484 from trunk to use
+ shortgap comp instead of extraexon comp for representing dual breaks
+
+ * shortread.c: Merged revision 195483 from trunk to fix issues in reading
+ multiple pairs of files from command line
+
+2016-07-18 twu
+
+ * filestring.c: Implemented right-justified strings
+
2016-07-12 twu
* 2016-07-01-better-triage, archive.html, doublelist.c, doublelist.h,
diff --git a/src/atoiindex.c b/src/atoiindex.c
index 17e62cf..793d1f7 100644
--- a/src/atoiindex.c
+++ b/src/atoiindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: atoiindex.c 184156 2016-02-12 18:30:44Z twu $";
+static char rcsid[] = "$Id: atoiindex.c 195489 2016-08-02 00:16:53Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -691,7 +691,7 @@ compute_ag (char *new_pointers_filename, char *new_offsets_filename,
block_start = oldoffsets[ii];
block_end = oldoffsets[ii+1];
#ifdef WORDS_BIGENDIAN
- reduced = Atoi_reduce_ag(oligoi) & mask;
+ reduced = Atoi_reduce_ag(oligoi + ii) & mask;
offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm);
if (coord_values_8p == true) {
for (j = Bigendian_convert_uint(block_start); j < Bigendian_convert_uint(block_end); j++) {
@@ -719,7 +719,7 @@ compute_ag (char *new_pointers_filename, char *new_offsets_filename,
}
#else
if (block_end > block_start) {
- reduced = Atoi_reduce_ag(oligoi) & mask;
+ reduced = Atoi_reduce_ag(oligoi + ii) & mask;
offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm);
if (coord_values_8p == true) {
for (j = block_start; j < block_end; j++) {
@@ -888,7 +888,7 @@ compute_tc (char *new_pointers_filename, char *new_offsets_filename,
block_start = oldoffsets[ii];
block_end = oldoffsets[ii+1];
#ifdef WORDS_BIGENDIAN
- reduced = Atoi_reduce_tc(oligoi) & mask;
+ reduced = Atoi_reduce_tc(oligoi + ii) & mask;
offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm);
if (coord_values_8p == true) {
for (j = Bigendian_convert_uint(block_start); j < Bigendian_convert_uint(block_end); j++) {
@@ -916,7 +916,7 @@ compute_tc (char *new_pointers_filename, char *new_offsets_filename,
}
#else
if (block_end > block_start) {
- reduced = Atoi_reduce_tc(oligoi) & mask;
+ reduced = Atoi_reduce_tc(oligoi + ii) & mask;
offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm);
if (coord_values_8p == true) {
for (j = block_start; j < block_end; j++) {
diff --git a/src/comp.h b/src/comp.h
index 4c1e2cc..e3aa9ef 100644
--- a/src/comp.h
+++ b/src/comp.h
@@ -1,4 +1,4 @@
-/* $Id: comp.h 40271 2011-05-28 02:29:18Z twu $ */
+/* $Id: comp.h 195553 2016-08-02 17:32:39Z twu $ */
#ifndef COMP_INCLUDED
#define COMP_INCLUDED
diff --git a/src/inbuffer.c b/src/inbuffer.c
index 93ee3cf..064d817 100644
--- a/src/inbuffer.c
+++ b/src/inbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: inbuffer.c 179288 2015-11-20 00:45:00Z twu $";
+static char rcsid[] = "$Id: inbuffer.c 195493 2016-08-02 02:01:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -712,6 +712,7 @@ fill_buffer (T this) {
bool skipp;
int nchars1 = 0, nchars2 = 0; /* Returned only because MPI master needs it. Doesn't need to be saved as a field in Inbuffer_T. */
+ /* fprintf(stderr,"Entered fill_buffer\n"); */
while (nread < this->nspaces &&
(queryseq1 = Shortread_read(&this->nextchar,&nchars1,&nchars2,&queryseq2,
&this->input,&this->input2,
@@ -750,6 +751,7 @@ fill_buffer (T this) {
}
this->inputid++;
}
+ /* fprintf(stderr,"Read %d reads\n",nread); */
this->nleft = nread;
this->ptr = 0;
@@ -827,11 +829,13 @@ Inbuffer_get_request (Sequence_T *pairalign_segment, T this)
request = this->buffer[this->ptr++];
this->nleft -= 1;
+#if 0
} else if (this->nextchar == EOF) {
- /* ? Causes stall at end */
- /* Already know it is pointless to fill buffer */
+ /* Causes --force-single-end to fail when reads in a file are a multiple of nspaces */
+ /* Want to call fill_buffer to find out if the input is exhausted */
Outbuffer_add_nread(this->outbuffer,/*nread*/0);
request = NULL;
+#endif
} else {
#ifdef USE_MPI
diff --git a/src/pair.c b/src/pair.c
index 6085751..e0a0d5c 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 193885 2016-07-12 03:21:37Z twu $";
+static char rcsid[] = "$Id: pair.c 195553 2016-08-02 17:32:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/pairpool.c b/src/pairpool.c
index adc6af8..aa61a0a 100644
--- a/src/pairpool.c
+++ b/src/pairpool.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pairpool.c 184430 2016-02-17 19:56:26Z twu $";
+static char rcsid[] = "$Id: pairpool.c 195553 2016-08-02 17:32:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
diff --git a/src/sarray-read.c b/src/sarray-read.c
index de751cf..bc01892 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 195552 2016-08-02 17:31:06Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8201,8 +8201,11 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
querylength,sarray_fwd->indexsize,nmisses_allowed,genestrand));
if (nmisses_allowed < 0) {
nmisses_allowed = 0;
+#if 0
} else {
+ /* It is possible that this makes GSNAP too slow */
nmisses_allowed = querylength;
+#endif
}
*found_score = querylength;
diff --git a/src/shortread.c b/src/shortread.c
index 0ac4f4b..66d6389 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 193894 2016-07-12 04:09:51Z twu $";
+static char rcsid[] = "$Id: shortread.c 195492 2016-08-02 02:00:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -414,6 +414,7 @@ Shortread_input_init (int *nchars, FILE *fp) {
int c;
bool okayp = false;
+ debugf(fprintf(stderr,"Calling Shortread_input_init on %p\n",fp));
Header[0] = '\0';
while (okayp == false && (c = fgetc(fp)) != EOF) {
@@ -445,6 +446,7 @@ Shortread_input_init_filecontents (char **filecontents) {
int c;
bool okayp = false;
+ debugf(fprintf(stderr,"Calling Shortread_input_init_filecontents on %p\n",filecontents));
Header[0] = '\0';
while (okayp == false && (c = *(*filecontents)++) != EOF && c != '\0') {
@@ -473,6 +475,7 @@ Shortread_input_init_gzip (gzFile fp) {
int c;
bool okayp = false;
+ debugf(fprintf(stderr,"Calling Shortread_input_init_gzip on %p\n",fp));
Header[0] = '\0';
while (okayp == false && (c = gzgetc(fp)) != EOF) {
@@ -500,6 +503,7 @@ Shortread_input_init_bzip2 (Bzip2_T fp) {
int c;
bool okayp = false;
+ debugf(fprintf(stderr,"Calling Shortread_input_init_bzip2 on %p\n",fp));
Header[0] = '\0';
while (okayp == false && (c = bzgetc(fp)) != EOF) {
@@ -2893,10 +2897,13 @@ Shortread_read_fasta_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
} else {
if (*input2 == NULL && *nfiles > 0 && force_single_end_p == false &&
(*input2 = FOPEN_READ_TEXT((*files)[0])) != NULL) {
- debugf(fprintf(stderr,"Master opening input file 1\n"));
+ debugf(fprintf(stderr,"Master opening input file 2\n"));
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ if ((nextchar2 = Shortread_input_init(&(*nchars2),*input2)) == EOF) {
+ fclose(*input2);
+ *input2 = NULL;
+ }
}
if (*input2 != NULL) {
@@ -3073,12 +3080,14 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
(*nfiles) -= 1;
*nextchar = '\0';
return (T) NULL;
+#if 0
} else {
- debugf(fprintf(stderr,"Slave opening input file 1\n"));
+ debugf(fprintf(stderr,"Slave opening input file 2\n"));
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
*nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
+#endif
}
#else
if ((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
@@ -3087,12 +3096,14 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
(*nfiles) -= 1;
*nextchar = EOF;
return (T) NULL;
+#if 0
} else {
debugf(fprintf(stderr,"Slave opening input file 2\n"));
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
nextchar2 = '\0';
+#endif
}
#endif
@@ -3200,7 +3211,16 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
#endif
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ if ((nextchar2 = Shortread_input_init(&(*nchars2),*input2)) == EOF) {
+#ifdef USE_MPI_FILE_INPUT
+ debugf(fprintf(stderr,"Slave closing input 2 using MPI_File_close\n"));
+ MPI_File_close(&(*input2));
+#else
+ debugf(fprintf(stderr,"Slave closing input 2 using fclose\n"));
+ fclose(*input2);
+#endif
+ *input2 = NULL;
+ }
}
if (*filecontents2 != NULL) {
@@ -3329,10 +3349,12 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
queryseq1 = *queryseq2 = (T) NULL;
if (*input1 == NULL || *nextchar == EOF) { /* was gzeof(*input1) */
if (*input1 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 1 using gzclose\n"));
gzclose(*input1);
*input1 = NULL;
}
if (*input2 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 2 using gzclose\n"));
gzclose(*input2);
*input2 = NULL;
}
@@ -3349,6 +3371,7 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
*nextchar = EOF;
return (T) NULL;
} else {
+ debugf(fprintf(stderr,"Master opening input file 1\n"));
#ifdef HAVE_ZLIB_GZBUFFER
gzbuffer(*input1,GZBUFFER_SIZE);
#endif
@@ -3361,8 +3384,8 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
} else {
while (*nfiles > 0 && (*input1 = gzopen((*files)[0],"rb")) == NULL) {
fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
- (*files)++;
- (*nfiles)--;
+ (*files) += 1;
+ (*nfiles) -= 1;
}
if (*input1 == NULL) {
*nextchar = EOF;
@@ -3371,8 +3394,8 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
#ifdef HAVE_ZLIB_GZBUFFER
gzbuffer(*input1,GZBUFFER_SIZE);
#endif
- (*files)++;
- (*nfiles)--;
+ (*files) += 1;
+ (*nfiles) -= 1;
*nextchar = '\0';
}
}
@@ -3480,7 +3503,10 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
#endif
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ if ((nextchar2 = Shortread_input_init_gzip(*input2)) == EOF) {
+ gzclose(*input2);
+ *input2 = NULL;
+ }
}
if (*input2 != NULL) {
@@ -3650,10 +3676,12 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
queryseq1 = *queryseq2 = (T) NULL;
if (*input1 == NULL || *nextchar == EOF) { /* Was bzeof(*input1) */
if (*input1 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 1 using Bzip2_free\n"));
Bzip2_free(&(*input1));
*input1 = NULL;
}
if (*input2 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 2 using Bzip2_free\n"));
Bzip2_free(&(*input2));
*input2 = NULL;
}
@@ -3670,6 +3698,7 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
*nextchar = EOF;
return (T) NULL;
} else {
+ debugf(fprintf(stderr,"Master opening input file 1\n"));
*input2 = NULL;
(*files) += 1;
(*nfiles) -= 1;
@@ -3679,15 +3708,15 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
} else {
while (*nfiles > 0 && (*input1 = Bzip2_new((*files)[0])) == NULL) {
fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
- (*files)++;
- (*nfiles)--;
+ (*files) += 1;
+ (*nfiles) -= 1;
}
if (*input1 == NULL) {
*nextchar = EOF;
return (T) NULL;
} else {
- (*files)++;
- (*nfiles)--;
+ (*files) += 1;
+ (*nfiles) -= 1;
*nextchar = '\0';
}
}
@@ -3792,7 +3821,10 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
(*input2 = Bzip2_new((*files)[0])) != NULL) {
(*files) += 1;
(*nfiles) -= 1;
- nextchar2 = '\0';
+ if ((nextchar2 = Shortread_input_init_bzip2(*input2)) == EOF) {
+ Bzip2_free(&(*input2));
+ *input2 = NULL;
+ }
}
if (*input2 != NULL) {
@@ -4004,6 +4036,9 @@ Shortread_read_fastq_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
(*files) += 2;
(*nfiles) -= 2;
*nextchar = '\0';
+ if ((nextchar2 = Shortread_input_init(&(*nchars2),*input2)) == EOF) {
+ return (T) NULL;
+ }
}
}
}
@@ -4221,6 +4256,9 @@ read_fastq_filecontents (int *nextchar, T *queryseq2,
(*files) += 2;
(*nfiles) -= 2;
*nextchar = '\0';
+ if ((nextchar2 = Shortread_input_init_filecontents(&(*filecontents2))) == '\0') {
+ return (T) NULL;
+ }
}
#else
while (*nfiles > 0 &&
@@ -4239,6 +4277,9 @@ read_fastq_filecontents (int *nextchar, T *queryseq2,
(*files) += 2;
(*nfiles) -= 2;
*nextchar = '\0';
+ if ((nextchar2 = Shortread_input_init_filecontents(&(*filecontents2))) == '\0') {
+ return (T) NULL;
+ }
}
#endif
}
@@ -4377,10 +4418,12 @@ Shortread_read_fastq_gzip (int *nextchar, T *queryseq2,
queryseq1 = *queryseq2 = (T) NULL;
if (*input1 == NULL || *nextchar == EOF) { /* was gzeof(*input1) */
if (*input1 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 1 using gzclose\n"));
gzclose(*input1);
*input1 = NULL;
}
if (*input2 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 2 using gzclose\n"));
gzclose(*input2);
*input2 = NULL;
}
@@ -4391,44 +4434,50 @@ Shortread_read_fastq_gzip (int *nextchar, T *queryseq2,
} else if (*nfiles == 1 || force_single_end_p == true) {
if ((*input1 = gzopen((*files)[0],"rb")) == NULL) {
- fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]);
- exit(9);
+ fprintf(stderr,"Cannot open gzipped file %s => skipping.\n",(*files)[0]);
+ (*files) += 1;
+ (*nfiles) -= 1;
+ *nextchar = EOF;
+ return (T) NULL;
} else {
+ debugf(fprintf(stderr,"Master opening input file 1\n"));
#ifdef HAVE_ZLIB_GZBUFFER
gzbuffer(*input1,GZBUFFER_SIZE);
#endif
+ *input2 = NULL;
+ (*files) += 1;
+ (*nfiles) -= 1;
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
}
- *input2 = NULL;
- (*files) += 1;
- (*nfiles) -= 1;
- *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
} else {
- if ((*input1 = gzopen((*files)[0],"rb")) == NULL) {
- fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]);
- exit(9);
- } else {
-#ifdef HAVE_ZLIB_GZBUFFER
- gzbuffer(*input1,GZBUFFER_SIZE);
-#endif
+ while (*nfiles > 0 &&
+ ((*input1 = gzopen((*files)[0],"rb")) == NULL ||
+ (*input2 = gzopen((*files)[1],"rb")) == NULL)) {
+ fprintf(stderr,"Can't open file %s or %s => skipping both.\n",
+ (*files)[0],(*files)[1]);
+ (*files) += 2;
+ (*nfiles) -= 2;
}
-
- if ((*input2 = gzopen((*files)[1],"rb")) == NULL) {
- fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[1]);
- exit(9);
+ if (*input1 == NULL) {
+ *nextchar = EOF;
+ return (T) NULL;
} else {
+ debugf(fprintf(stderr,"Master opening input files 1 and 2\n"));
#ifdef HAVE_ZLIB_GZBUFFER
+ gzbuffer(*input1,GZBUFFER_SIZE);
gzbuffer(*input2,GZBUFFER_SIZE);
#endif
+ (*files) += 2;
+ (*nfiles) -= 2;
+ *nextchar = '\0';
+ if ((nextchar2 = Shortread_input_init_gzip(*input2)) == EOF) {
+ return (T) NULL;
+ }
}
-
- (*files) += 2;
- (*nfiles) -= 2;
- *nextchar = '\0';
}
}
-
if (*nextchar == '\0') {
if ((*nextchar = Shortread_input_init_gzip(*input1)) == EOF) {
return (T) NULL;
@@ -4593,10 +4642,12 @@ Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2,
queryseq1 = *queryseq2 = (T) NULL;
if (*input1 == NULL || *nextchar == EOF) { /* Was bzeof(*input1) */
if (*input1 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 1 using Bzip2_free\n"));
Bzip2_free(&(*input1));
*input1 = NULL;
}
if (*input2 != NULL) {
+ debugf(fprintf(stderr,"Master closing input 2 using Bzip2_free\n"));
Bzip2_free(&(*input2));
*input2 = NULL;
}
@@ -4607,28 +4658,40 @@ Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2,
} else if (*nfiles == 1 || force_single_end_p == true) {
if ((*input1 = Bzip2_new((*files)[0])) == NULL) {
- fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[0]);
- exit(9);
+ fprintf(stderr,"Can't open file %s => skipping.\n",(*files)[0]);
+ (*files) += 1;
+ (*nfiles) -= 1;
+ *nextchar = EOF;
+ return (T) NULL;
+ } else {
+ debugf(fprintf(stderr,"Master opening input file 1\n"));
+ *input2 = NULL;
+ (*files) += 1;
+ (*nfiles) -= 1;
+ *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
}
- *input2 = NULL;
- (*files) += 1;
- (*nfiles) -= 1;
- *nextchar = '\0'; /* Was nextchar2 = '\0', which is incorrect */
-
+
} else {
- if ((*input1 = Bzip2_new((*files)[0])) == NULL) {
- fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[0]);
- exit(9);
+ while (*nfiles > 0 &&
+ ((*input1 = Bzip2_new((*files)[0])) == NULL ||
+ (*input2 = Bzip2_new((*files)[1])) == NULL)) {
+ fprintf(stderr,"Can't open file %s or %s => skipping both.\n",
+ (*files)[0],(*files)[1]);
+ (*files) += 2;
+ (*nfiles) -= 2;
}
-
- if ((*input2 = Bzip2_new((*files)[1])) == NULL) {
- fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[1]);
- exit(9);
+ if (*input1 == NULL) {
+ *nextchar = EOF;
+ return (T) NULL;
+ } else {
+ debugf(fprintf(stderr,"Master opening input files 1 and 2\n"));
+ (*files) += 2;
+ (*nfiles) -= 2;
+ *nextchar = '\0';
+ if ((nextchar2 = Shortread_input_init_bzip2(*input2)) == EOF) {
+ return (T) NULL;
+ }
}
-
- (*files) += 2;
- (*nfiles) -= 2;
- *nextchar = '\0';
}
}
diff --git a/src/stage3.c b/src/stage3.c
index dabb211..b13c3ab 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: stage3.c 195553 2016-08-02 17:32:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -8581,7 +8581,8 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
lastpair = (Pair_T) gappairs->first;
firstpair = (Pair_T) List_last_value(gappairs);
debug14(printf("gappairs goes from %d to %d\n",firstpair->querypos,lastpair->querypos));
- if (firstpair->querypos == querydp5 && lastpair->querypos == querydp3) {
+ if (1 || (firstpair->querypos == querydp5 && lastpair->querypos == querydp3)) {
+ /* Note: We have to take this branch, otherwise we get unexpected comp errors */
/* fprintf(stderr,"%d..%d .. %d..%d\n",querydp5,firstpair->querypos,lastpair->querypos,querydp3); */
debug14(printf(" => entire query sequence bridged or not, but taking it regardless\n"));
pairs = Pairpool_transfer(pairs,gappairs);
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 0f617cf..006780a 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 195553 2016-08-02 17:32:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -66,6 +66,7 @@ static char rcsid[] = "$Id: stage3hr.c 193899 2016-07-12 04:41:34Z twu $";
#define TERMINAL_COMPUTE_MINLENGTH 40
#define SCORE_INDELS_EVENTRIM 1 /* Needed to compare genomic positions with and without indels */
#define EVENTRIM_BADINTRON_PENALTY 2
+#define DO_FINAL 1
#define OUTERLENGTH_SLOP 100
@@ -16488,7 +16489,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
int n;
int minscore5 = querylength5, minscore3 = querylength3, minscore = querylength5 + querylength3;
int best_nsegments, best_nsegments_5, best_nsegments_3;
- /* int max_nmatches = 0; */
+ int max_nmatches = 0;
#ifdef USE_OPTIMAL_SCORE_BINGO
int minscore_bingo = querylength5 + querylength3;
#endif
@@ -16842,8 +16843,8 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
}
} else {
- /* Final: Previously based purely on nmatches. However, this leads to indelbreaks and bad introns. */
-#if 0
+#ifdef DO_FINAL
+ /* Final: based on nmatches. ? leads to indelbreaks and bad introns. But skipping this leads to nearly-identical alignments */
max_nmatches = 0;
for (p = hitpairlist; p != NULL; p = p->rest) {
hitpair = (Stage3pair_T) p->first;
@@ -16858,6 +16859,10 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
max_nmatches = hitpair->nmatches;
}
}
+
+ cutoff_level = max_nmatches - subopt_levels;
+ debug6(printf("cutoff level %d = max_nmatches %d - subopt %d\n",cutoff_level,max_nmatches,subopt_levels));
+
#else
for (p = hitpairlist; p != NULL; p = p->rest) {
hitpair = (Stage3pair_T) p->first;
@@ -16901,6 +16906,15 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
hitpair,hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),hitpair->score));
optimal = List_push(optimal,hitpair);
+#ifdef DO_FINAL
+ } else if (hitpair->nmatches < cutoff_level) {
+ debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with nmatches %d < cutoff_level %d (finalp %d)\n",
+ hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
+ hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
+ hitpair->nmatches,cutoff_level,finalp));
+ *eliminatedp = true;
+ Stage3pair_free(&hitpair);
+#else
} else if (hitpair->hit5->score_eventrim + hitpair->hit3->score_eventrim > cutoff_level) {
debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with scores %d+%d > cutoff_level %d (finalp %d)\n",
hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
@@ -16908,6 +16922,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,cutoff_level,finalp));
*eliminatedp = true;
Stage3pair_free(&hitpair);
+#endif
} else {
debug6(printf("Final: Keeping hit pair %p with scores %d+%d (vs cutoff_level %d)\n",
diff --git a/util/gtf_genes.pl.in b/util/gtf_genes.pl.in
index 44e88b9..3ec4f0b 100644
--- a/util/gtf_genes.pl.in
+++ b/util/gtf_genes.pl.in
@@ -94,9 +94,9 @@ sub get_info {
my $info = shift @_;
my @desired_keys = @_;
- foreach $item (@ {$info}) {
- ($key,$value) = $item =~ /(\S+) (.+)/;
- foreach $desired_key (@desired_keys) {
+ foreach $desired_key (@desired_keys) {
+ foreach $item (@ {$info}) {
+ ($key,$value) = $item =~ /(\S+) (.+)/;
if ($key eq $desired_key) {
return $value;
}
@@ -112,9 +112,9 @@ sub get_info_optional {
my $info = shift @_;
my @desired_keys = @_;
- foreach $item (@ {$info}) {
- ($key,$value) = $item =~ /(\S+) (.+)/;
- foreach $desired_key (@desired_keys) {
+ foreach $desired_key (@desired_keys) {
+ foreach $item (@ {$info}) {
+ ($key,$value) = $item =~ /(\S+) (.+)/;
if ($key eq $desired_key) {
return $value;
}
diff --git a/util/gtf_introns.pl.in b/util/gtf_introns.pl.in
index ae3c364..232dc40 100755
--- a/util/gtf_introns.pl.in
+++ b/util/gtf_introns.pl.in
@@ -177,9 +177,9 @@ sub get_info {
my $info = shift @_;
my @desired_keys = @_;
- foreach $item (@ {$info}) {
- ($key,$value) = $item =~ /(\S+) (.+)/;
- foreach $desired_key (@desired_keys) {
+ foreach $desired_key (@desired_keys) {
+ foreach $item (@ {$info}) {
+ ($key,$value) = $item =~ /(\S+) (.+)/;
if ($key eq $desired_key) {
return $value;
}
@@ -195,9 +195,9 @@ sub get_info_optional {
my $info = shift @_;
my @desired_keys = @_;
- foreach $item (@ {$info}) {
- ($key,$value) = $item =~ /(\S+) (.+)/;
- foreach $desired_key (@desired_keys) {
+ foreach $desired_key (@desired_keys) {
+ foreach $item (@ {$info}) {
+ ($key,$value) = $item =~ /(\S+) (.+)/;
if ($key eq $desired_key) {
return $value;
}
diff --git a/util/gtf_splicesites.pl.in b/util/gtf_splicesites.pl.in
index e018f0e..d296a9c 100755
--- a/util/gtf_splicesites.pl.in
+++ b/util/gtf_splicesites.pl.in
@@ -177,9 +177,9 @@ sub get_info {
my $info = shift @_;
my @desired_keys = @_;
- foreach $item (@ {$info}) {
- ($key,$value) = $item =~ /(\S+) (.+)/;
- foreach $desired_key (@desired_keys) {
+ foreach $desired_key (@desired_keys) {
+ foreach $item (@ {$info}) {
+ ($key,$value) = $item =~ /(\S+) (.+)/;
if ($key eq $desired_key) {
return $value;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list