[med-svn] [gmap] 01/06: Imported Upstream version 2016-07-11.v4

Alex Mestiashvili malex-guest at moszumanska.debian.org
Tue Aug 9 09:08:05 UTC 2016


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository gmap.

commit 1250ec2ea5db7b724dda8c3d03b04b81500909f2
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Mon Aug 8 10:41:03 2016 +0200

    Imported Upstream version 2016-07-11.v4
---
 ChangeLog                  |  34 +++++++++
 src/atoiindex.c            |  10 +--
 src/comp.h                 |   2 +-
 src/inbuffer.c             |  10 ++-
 src/pair.c                 |   2 +-
 src/pairpool.c             |   2 +-
 src/sarray-read.c          |   5 +-
 src/shortread.c            | 173 +++++++++++++++++++++++++++++++--------------
 src/stage3.c               |   5 +-
 src/stage3hr.c             |  23 ++++--
 util/gtf_genes.pl.in       |  12 ++--
 util/gtf_introns.pl.in     |  12 ++--
 util/gtf_splicesites.pl.in |   6 +-
 13 files changed, 208 insertions(+), 88 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 6abfd3f..d5366eb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2016-08-02  twu
+
+    * gtf_genes.pl.in, gtf_introns.pl.in, gtf_splicesites.pl.in, util: Merged
+      revision 195495 from trunk to use preference order of desired keys, rather
+      than the text
+
+    * comp.h, pair.c, pairpool.c, src, stage3.c, stage3hr.c: Merging revisions
+      195548 through 195550 from trunk to revert recent changes to extraexon
+      comp, addition of dual breaks, and restoring final procedure in
+      Stage3pair_optimal_score
+
+    * sarray-read.c: Limiting suffix array procedure by nmisses_allowed again
+
+    * inbuffer.c: Fixed problem where --force-single-end terminated when file
+      had reads that were a multiple of --input-buffer-size
+
+    * shortread.c: Changed debugging statements to write to stderr
+
+    * atoiindex.c: Merged revision 194848 from trunk to fix calculation of oligo
+      using new block algorithm
+
+    * stage3.c: Merged revision 195487 from trunk to use shortgap comp instead
+      of extraexon comp for representing dual breaks
+
+    * comp.h, pair.c, pairpool.c: Merged revision 195484 from trunk to use
+      shortgap comp instead of extraexon comp for representing dual breaks
+
+    * shortread.c: Merged revision 195483 from trunk to fix issues in reading
+      multiple pairs of files from command line
+
+2016-07-18  twu
+
+    * filestring.c: Implemented right-justified strings
+
 2016-07-12  twu
 
     * 2016-07-01-better-triage, archive.html, doublelist.c, doublelist.h,
diff --git a/src/atoiindex.c b/src/atoiindex.c
index 17e62cf..793d1f7 100644
--- a/src/atoiindex.c
+++ b/src/atoiindex.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: atoiindex.c 184156 2016-02-12 18:30:44Z twu $";
+static char rcsid[] = "$Id: atoiindex.c 195489 2016-08-02 00:16:53Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -691,7 +691,7 @@ compute_ag (char *new_pointers_filename, char *new_offsets_filename,
       block_start = oldoffsets[ii];
       block_end = oldoffsets[ii+1];
 #ifdef WORDS_BIGENDIAN
-      reduced = Atoi_reduce_ag(oligoi) & mask;
+      reduced = Atoi_reduce_ag(oligoi + ii) & mask;
       offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm);
       if (coord_values_8p == true) {
 	for (j = Bigendian_convert_uint(block_start); j < Bigendian_convert_uint(block_end); j++) {
@@ -719,7 +719,7 @@ compute_ag (char *new_pointers_filename, char *new_offsets_filename,
       }
 #else
       if (block_end > block_start) {
-	reduced = Atoi_reduce_ag(oligoi) & mask;
+	reduced = Atoi_reduce_ag(oligoi + ii) & mask;
 	offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm);
 	if (coord_values_8p == true) {
 	  for (j = block_start; j < block_end; j++) {
@@ -888,7 +888,7 @@ compute_tc (char *new_pointers_filename, char *new_offsets_filename,
       block_start = oldoffsets[ii];
       block_end = oldoffsets[ii+1];
 #ifdef WORDS_BIGENDIAN
-      reduced = Atoi_reduce_tc(oligoi) & mask;
+      reduced = Atoi_reduce_tc(oligoi + ii) & mask;
       offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm);
       if (coord_values_8p == true) {
 	for (j = Bigendian_convert_uint(block_start); j < Bigendian_convert_uint(block_end); j++) {
@@ -916,7 +916,7 @@ compute_tc (char *new_pointers_filename, char *new_offsets_filename,
       }
 #else
       if (block_end > block_start) {
-	reduced = Atoi_reduce_tc(oligoi) & mask;
+	reduced = Atoi_reduce_tc(oligoi + ii) & mask;
 	offset = Bitpack64_read_one(reduced,newoffsetsmeta,newoffsetsstrm) + Bitpack64_access(reduced,countermeta,counterstrm);
 	if (coord_values_8p == true) {
 	  for (j = block_start; j < block_end; j++) {
diff --git a/src/comp.h b/src/comp.h
index 4c1e2cc..e3aa9ef 100644
--- a/src/comp.h
+++ b/src/comp.h
@@ -1,4 +1,4 @@
-/* $Id: comp.h 40271 2011-05-28 02:29:18Z twu $ */
+/* $Id: comp.h 195553 2016-08-02 17:32:39Z twu $ */
 #ifndef COMP_INCLUDED
 #define COMP_INCLUDED
 
diff --git a/src/inbuffer.c b/src/inbuffer.c
index 93ee3cf..064d817 100644
--- a/src/inbuffer.c
+++ b/src/inbuffer.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: inbuffer.c 179288 2015-11-20 00:45:00Z twu $";
+static char rcsid[] = "$Id: inbuffer.c 195493 2016-08-02 02:01:39Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -712,6 +712,7 @@ fill_buffer (T this) {
   bool skipp;
   int nchars1 = 0, nchars2 = 0;		/* Returned only because MPI master needs it.  Doesn't need to be saved as a field in Inbuffer_T. */
 
+  /* fprintf(stderr,"Entered fill_buffer\n"); */
   while (nread < this->nspaces &&
 	 (queryseq1 = Shortread_read(&this->nextchar,&nchars1,&nchars2,&queryseq2,
 				     &this->input,&this->input2,
@@ -750,6 +751,7 @@ fill_buffer (T this) {
     }
     this->inputid++;
   }
+  /* fprintf(stderr,"Read %d reads\n",nread); */
 
   this->nleft = nread;
   this->ptr = 0;
@@ -827,11 +829,13 @@ Inbuffer_get_request (Sequence_T *pairalign_segment, T this)
     request = this->buffer[this->ptr++];
     this->nleft -= 1;
 
+#if 0
   } else if (this->nextchar == EOF) {
-    /* ? Causes stall at end */
-    /* Already know it is pointless to fill buffer */
+    /* Causes --force-single-end to fail when reads in a file are a multiple of nspaces */
+    /* Want to call fill_buffer to find out if the input is exhausted */
     Outbuffer_add_nread(this->outbuffer,/*nread*/0);
     request = NULL;
+#endif
 
   } else {
 #ifdef USE_MPI
diff --git a/src/pair.c b/src/pair.c
index 6085751..e0a0d5c 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 193885 2016-07-12 03:21:37Z twu $";
+static char rcsid[] = "$Id: pair.c 195553 2016-08-02 17:32:39Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/pairpool.c b/src/pairpool.c
index adc6af8..aa61a0a 100644
--- a/src/pairpool.c
+++ b/src/pairpool.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pairpool.c 184430 2016-02-17 19:56:26Z twu $";
+static char rcsid[] = "$Id: pairpool.c 195553 2016-08-02 17:32:39Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
diff --git a/src/sarray-read.c b/src/sarray-read.c
index de751cf..bc01892 100644
--- a/src/sarray-read.c
+++ b/src/sarray-read.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: sarray-read.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: sarray-read.c 195552 2016-08-02 17:31:06Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -8201,8 +8201,11 @@ Sarray_search_greedy (int *found_score, char *queryuc_ptr, char *queryrc, int qu
 	       querylength,sarray_fwd->indexsize,nmisses_allowed,genestrand));
   if (nmisses_allowed < 0) {
     nmisses_allowed = 0;
+#if 0
   } else {
+    /* It is possible that this makes GSNAP too slow */
     nmisses_allowed = querylength;
+#endif
   }
   *found_score = querylength;
 
diff --git a/src/shortread.c b/src/shortread.c
index 0ac4f4b..66d6389 100644
--- a/src/shortread.c
+++ b/src/shortread.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: shortread.c 193894 2016-07-12 04:09:51Z twu $";
+static char rcsid[] = "$Id: shortread.c 195492 2016-08-02 02:00:54Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -414,6 +414,7 @@ Shortread_input_init (int *nchars, FILE *fp) {
   int c;
   bool okayp = false;
 
+  debugf(fprintf(stderr,"Calling Shortread_input_init on %p\n",fp));
   Header[0] = '\0';
 
   while (okayp == false && (c = fgetc(fp)) != EOF) {
@@ -445,6 +446,7 @@ Shortread_input_init_filecontents (char **filecontents) {
   int c;
   bool okayp = false;
 
+  debugf(fprintf(stderr,"Calling Shortread_input_init_filecontents on %p\n",filecontents));
   Header[0] = '\0';
 
   while (okayp == false && (c = *(*filecontents)++) != EOF && c != '\0') {
@@ -473,6 +475,7 @@ Shortread_input_init_gzip (gzFile fp) {
   int c;
   bool okayp = false;
 
+  debugf(fprintf(stderr,"Calling Shortread_input_init_gzip on %p\n",fp));
   Header[0] = '\0';
 
   while (okayp == false && (c = gzgetc(fp)) != EOF) {
@@ -500,6 +503,7 @@ Shortread_input_init_bzip2 (Bzip2_T fp) {
   int c;
   bool okayp = false;
 
+  debugf(fprintf(stderr,"Calling Shortread_input_init_bzip2 on %p\n",fp));
   Header[0] = '\0';
 
   while (okayp == false && (c = bzgetc(fp)) != EOF) {
@@ -2893,10 +2897,13 @@ Shortread_read_fasta_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
       } else {
 	if (*input2 == NULL && *nfiles > 0 && force_single_end_p == false &&
 	    (*input2 = FOPEN_READ_TEXT((*files)[0])) != NULL) {
-	  debugf(fprintf(stderr,"Master opening input file 1\n"));
+	  debugf(fprintf(stderr,"Master opening input file 2\n"));
 	  (*files) += 1;
 	  (*nfiles) -= 1;
-	  nextchar2 = '\0';
+	  if ((nextchar2 = Shortread_input_init(&(*nchars2),*input2)) == EOF) {
+	    fclose(*input2);
+	    *input2 = NULL;
+	  }
 	}
 
 	if (*input2 != NULL) {
@@ -3073,12 +3080,14 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
 	  (*nfiles) -= 1;
 	  *nextchar = '\0';
 	  return (T) NULL;
+#if 0
 	} else {
-	  debugf(fprintf(stderr,"Slave opening input file 1\n"));
+	  debugf(fprintf(stderr,"Slave opening input file 2\n"));
 	  *input2 = NULL;
 	  (*files) += 1;
 	  (*nfiles) -= 1;
 	  *nextchar = '\0';	/* Was nextchar2 = '\0', which is incorrect */
+#endif
 	}
 #else
 	if ((*input1 = FOPEN_READ_TEXT((*files)[0])) == NULL) {
@@ -3087,12 +3096,14 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
 	  (*nfiles) -= 1;
 	  *nextchar = EOF;
 	  return (T) NULL;
+#if 0
 	} else {
 	  debugf(fprintf(stderr,"Slave opening input file 2\n"));
 	  *input2 = NULL;
 	  (*files) += 1;
 	  (*nfiles) -= 1;
 	  nextchar2 = '\0';
+#endif
 	}
 #endif
 
@@ -3200,7 +3211,16 @@ read_fasta_filecontents (int *nextchar, T *queryseq2,
 #endif
 	  (*files) += 1;
 	  (*nfiles) -= 1;
-	  nextchar2 = '\0';
+	  if ((nextchar2 = Shortread_input_init(&(*nchars2),*input2)) == EOF) {
+#ifdef USE_MPI_FILE_INPUT
+	    debugf(fprintf(stderr,"Slave closing input 2 using MPI_File_close\n"));
+	    MPI_File_close(&(*input2));
+#else
+	    debugf(fprintf(stderr,"Slave closing input 2 using fclose\n"));
+	    fclose(*input2);
+#endif
+	    *input2 = NULL;
+	  }
 	}
 
 	if (*filecontents2 != NULL) {
@@ -3329,10 +3349,12 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
     queryseq1 = *queryseq2 = (T) NULL;
     if (*input1 == NULL || *nextchar == EOF) { /* was gzeof(*input1) */
       if (*input1 != NULL) {
+	debugf(fprintf(stderr,"Master closing input 1 using gzclose\n"));
 	gzclose(*input1);
 	*input1 = NULL;
       }
       if (*input2 != NULL) {
+	debugf(fprintf(stderr,"Master closing input 2 using gzclose\n"));
 	gzclose(*input2);
 	*input2 = NULL;
       }
@@ -3349,6 +3371,7 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
 	  *nextchar = EOF;
 	  return (T) NULL;
 	} else {
+	  debugf(fprintf(stderr,"Master opening input file 1\n"));
 #ifdef HAVE_ZLIB_GZBUFFER
 	  gzbuffer(*input1,GZBUFFER_SIZE);
 #endif
@@ -3361,8 +3384,8 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
       } else {
 	while (*nfiles > 0 && (*input1 = gzopen((*files)[0],"rb")) == NULL) {
 	  fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
-	  (*files)++;
-	  (*nfiles)--;
+	  (*files) += 1;
+	  (*nfiles) -= 1;
 	}
 	if (*input1 == NULL) {
 	  *nextchar = EOF;
@@ -3371,8 +3394,8 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
 #ifdef HAVE_ZLIB_GZBUFFER
 	  gzbuffer(*input1,GZBUFFER_SIZE);
 #endif
-	  (*files)++;
-	  (*nfiles)--;
+	  (*files) += 1;
+	  (*nfiles) -= 1;
 	  *nextchar = '\0';
 	}
       }
@@ -3480,7 +3503,10 @@ Shortread_read_fasta_gzip (int *nextchar, T *queryseq2,
 #endif
 	  (*files) += 1;
 	  (*nfiles) -= 1;
-	  nextchar2 = '\0';
+	  if ((nextchar2 = Shortread_input_init_gzip(*input2)) == EOF) {
+	    gzclose(*input2);
+	    *input2 = NULL;
+	  }
 	}
 
 	if (*input2 != NULL) {
@@ -3650,10 +3676,12 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
     queryseq1 = *queryseq2 = (T) NULL;
     if (*input1 == NULL || *nextchar == EOF) { /* Was bzeof(*input1) */
       if (*input1 != NULL) {
+	debugf(fprintf(stderr,"Master closing input 1 using Bzip2_free\n"));
 	Bzip2_free(&(*input1));
 	*input1 = NULL;
       }
       if (*input2 != NULL) {
+	debugf(fprintf(stderr,"Master closing input 2 using Bzip2_free\n"));
 	Bzip2_free(&(*input2));
 	*input2 = NULL;
       }
@@ -3670,6 +3698,7 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
 	  *nextchar = EOF;
 	  return (T) NULL;
 	} else {
+	  debugf(fprintf(stderr,"Master opening input file 1\n"));
 	  *input2 = NULL;
 	  (*files) += 1;
 	  (*nfiles) -= 1;
@@ -3679,15 +3708,15 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
       } else {
 	while (*nfiles > 0 && (*input1 = Bzip2_new((*files)[0])) == NULL) {
 	  fprintf(stderr,"Can't open file %s => skipping it.\n",(*files)[0]);
-	  (*files)++;
-	  (*nfiles)--;
+	  (*files) += 1;
+	  (*nfiles) -= 1;
 	}
 	if (*input1 == NULL) {
 	  *nextchar = EOF;
 	  return (T) NULL;
 	} else {
-	  (*files)++;
-	  (*nfiles)--;
+	  (*files) += 1;
+	  (*nfiles) -= 1;
 	  *nextchar = '\0';
 	}
       }
@@ -3792,7 +3821,10 @@ Shortread_read_fasta_bzip2 (int *nextchar, T *queryseq2,
 	    (*input2 = Bzip2_new((*files)[0])) != NULL) {
 	  (*files) += 1;
 	  (*nfiles) -= 1;
-	  nextchar2 = '\0';
+	  if ((nextchar2 = Shortread_input_init_bzip2(*input2)) == EOF) {
+	    Bzip2_free(&(*input2));
+	    *input2 = NULL;
+	  }
 	}
 
 	if (*input2 != NULL) {
@@ -4004,6 +4036,9 @@ Shortread_read_fastq_text (int *nextchar, int *nchars1, int *nchars2, T *queryse
 	  (*files) += 2;
 	  (*nfiles) -= 2;
 	  *nextchar = '\0';
+	  if ((nextchar2 = Shortread_input_init(&(*nchars2),*input2)) == EOF) {
+	    return (T) NULL;
+	  }
 	}
       }
     }
@@ -4221,6 +4256,9 @@ read_fastq_filecontents (int *nextchar, T *queryseq2,
 	  (*files) += 2;
 	  (*nfiles) -= 2;
 	  *nextchar = '\0';
+	  if ((nextchar2 = Shortread_input_init_filecontents(&(*filecontents2))) == '\0') {
+	    return (T) NULL;
+	  }
 	}
 #else
 	while (*nfiles > 0 &&
@@ -4239,6 +4277,9 @@ read_fastq_filecontents (int *nextchar, T *queryseq2,
 	  (*files) += 2;
 	  (*nfiles) -= 2;
 	  *nextchar = '\0';
+	  if ((nextchar2 = Shortread_input_init_filecontents(&(*filecontents2))) == '\0') {
+	    return (T) NULL;
+	  }
 	}
 #endif
       }
@@ -4377,10 +4418,12 @@ Shortread_read_fastq_gzip (int *nextchar, T *queryseq2,
     queryseq1 = *queryseq2 = (T) NULL;
     if (*input1 == NULL || *nextchar == EOF) { /* was gzeof(*input1) */
       if (*input1 != NULL) {
+	debugf(fprintf(stderr,"Master closing input 1 using gzclose\n"));
 	gzclose(*input1);
 	*input1 = NULL;
       }
       if (*input2 != NULL) {
+	debugf(fprintf(stderr,"Master closing input 2 using gzclose\n"));
 	gzclose(*input2);
 	*input2 = NULL;
       }
@@ -4391,44 +4434,50 @@ Shortread_read_fastq_gzip (int *nextchar, T *queryseq2,
 
       } else if (*nfiles == 1 || force_single_end_p == true) {
 	if ((*input1 = gzopen((*files)[0],"rb")) == NULL) {
-	  fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]);
-	  exit(9);
+	  fprintf(stderr,"Cannot open gzipped file %s => skipping.\n",(*files)[0]);
+	  (*files) += 1;
+	  (*nfiles) -= 1;
+	  *nextchar = EOF;
+	  return (T) NULL;
 	} else {
+	  debugf(fprintf(stderr,"Master opening input file 1\n"));
 #ifdef HAVE_ZLIB_GZBUFFER
 	  gzbuffer(*input1,GZBUFFER_SIZE);
 #endif
+	  *input2 = NULL;
+	  (*files) += 1;
+	  (*nfiles) -= 1;
+	  *nextchar = '\0';	/* Was nextchar2 = '\0', which is incorrect */
 	}
-	*input2 = NULL;
-	(*files) += 1;
-	(*nfiles) -= 1;
-	*nextchar = '\0';	/* Was nextchar2 = '\0', which is incorrect */
 	
       } else {
-	if ((*input1 = gzopen((*files)[0],"rb")) == NULL) {
-	  fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[0]);
-	  exit(9);
-	} else {
-#ifdef HAVE_ZLIB_GZBUFFER
-	  gzbuffer(*input1,GZBUFFER_SIZE);
-#endif
+	while (*nfiles > 0 &&
+	       ((*input1 = gzopen((*files)[0],"rb")) == NULL ||
+		(*input2 = gzopen((*files)[1],"rb")) == NULL)) {
+	  fprintf(stderr,"Can't open file %s or %s => skipping both.\n",
+		  (*files)[0],(*files)[1]);
+	  (*files) += 2;
+	  (*nfiles) -= 2;
 	}
-
-	if ((*input2 = gzopen((*files)[1],"rb")) == NULL) {
-	  fprintf(stderr,"Cannot open gzipped file %s\n",(*files)[1]);
-	  exit(9);
+	if (*input1 == NULL) {
+	  *nextchar = EOF;
+	  return (T) NULL;
 	} else {
+	  debugf(fprintf(stderr,"Master opening input files 1 and 2\n"));
 #ifdef HAVE_ZLIB_GZBUFFER
+	  gzbuffer(*input1,GZBUFFER_SIZE);
 	  gzbuffer(*input2,GZBUFFER_SIZE);
 #endif
+	  (*files) += 2;
+	  (*nfiles) -= 2;
+	  *nextchar = '\0';
+	  if ((nextchar2 = Shortread_input_init_gzip(*input2)) == EOF) {
+	    return (T) NULL;
+	  }
 	}
-
-	(*files) += 2;
-	(*nfiles) -= 2;
-	*nextchar = '\0';
       }
     }
 
-
     if (*nextchar == '\0') {
       if ((*nextchar = Shortread_input_init_gzip(*input1)) == EOF) {
 	return (T) NULL;
@@ -4593,10 +4642,12 @@ Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2,
     queryseq1 = *queryseq2 = (T) NULL;
     if (*input1 == NULL || *nextchar == EOF) { /* Was bzeof(*input1) */
       if (*input1 != NULL) {
+	debugf(fprintf(stderr,"Master closing input 1 using Bzip2_free\n"));
 	Bzip2_free(&(*input1));
 	*input1 = NULL;
       }
       if (*input2 != NULL) {
+	debugf(fprintf(stderr,"Master closing input 2 using Bzip2_free\n"));
 	Bzip2_free(&(*input2));
 	*input2 = NULL;
       }
@@ -4607,28 +4658,40 @@ Shortread_read_fastq_bzip2 (int *nextchar, T *queryseq2,
 
       } else if (*nfiles == 1 || force_single_end_p == true) {
 	if ((*input1 = Bzip2_new((*files)[0])) == NULL) {
-	  fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[0]);
-	  exit(9);
+	  fprintf(stderr,"Can't open file %s => skipping.\n",(*files)[0]);
+	  (*files) += 1;
+	  (*nfiles) -= 1;
+	  *nextchar = EOF;
+	  return (T) NULL;
+	} else {
+	  debugf(fprintf(stderr,"Master opening input file 1\n"));
+	  *input2 = NULL;
+	  (*files) += 1;
+	  (*nfiles) -= 1;
+	  *nextchar = '\0';	/* Was nextchar2 = '\0', which is incorrect */
 	}
-	*input2 = NULL;
-	(*files) += 1;
-	(*nfiles) -= 1;
-	*nextchar = '\0';	/* Was nextchar2 = '\0', which is incorrect */
-	
+
       } else {
-	if ((*input1 = Bzip2_new((*files)[0])) == NULL) {
-	  fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[0]);
-	  exit(9);
+	while (*nfiles > 0 &&
+	       ((*input1 = Bzip2_new((*files)[0])) == NULL ||
+		(*input2 = Bzip2_new((*files)[1])) == NULL)) {
+	  fprintf(stderr,"Can't open file %s or %s => skipping both.\n",
+		  (*files)[0],(*files)[1]);
+	  (*files) += 2;
+	  (*nfiles) -= 2;
 	}
-
-	if ((*input2 = Bzip2_new((*files)[1])) == NULL) {
-	  fprintf(stderr,"Cannot open bzip2 file %s\n",(*files)[1]);
-	  exit(9);
+	if (*input1 == NULL) {
+	  *nextchar = EOF;
+	  return (T) NULL;
+	} else {
+	  debugf(fprintf(stderr,"Master opening input files 1 and 2\n"));
+	  (*files) += 2;
+	  (*nfiles) -= 2;
+	  *nextchar = '\0';
+	  if ((nextchar2 = Shortread_input_init_bzip2(*input2)) == EOF) {
+	    return (T) NULL;
+	  }
 	}
-
-	(*files) += 2;
-	(*nfiles) -= 2;
-	*nextchar = '\0';
       }
     }
 
diff --git a/src/stage3.c b/src/stage3.c
index dabb211..b13c3ab 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: stage3.c 195553 2016-08-02 17:32:39Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -8581,7 +8581,8 @@ traverse_dual_break (List_T pairs, List_T *path, Pair_T leftpair, Pair_T rightpa
     lastpair = (Pair_T) gappairs->first;
     firstpair = (Pair_T) List_last_value(gappairs);
     debug14(printf("gappairs goes from %d to %d\n",firstpair->querypos,lastpair->querypos));
-    if (firstpair->querypos == querydp5 && lastpair->querypos == querydp3) {
+    if (1 || (firstpair->querypos == querydp5 && lastpair->querypos == querydp3)) {
+      /* Note: We have to take this branch, otherwise we get unexpected comp errors */
       /* fprintf(stderr,"%d..%d .. %d..%d\n",querydp5,firstpair->querypos,lastpair->querypos,querydp3); */
       debug14(printf("  => entire query sequence bridged or not, but taking it regardless\n"));
       pairs = Pairpool_transfer(pairs,gappairs);
diff --git a/src/stage3hr.c b/src/stage3hr.c
index 0f617cf..006780a 100644
--- a/src/stage3hr.c
+++ b/src/stage3hr.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 193899 2016-07-12 04:41:34Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 195553 2016-08-02 17:32:39Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -66,6 +66,7 @@ static char rcsid[] = "$Id: stage3hr.c 193899 2016-07-12 04:41:34Z twu $";
 #define TERMINAL_COMPUTE_MINLENGTH 40
 #define SCORE_INDELS_EVENTRIM 1 /* Needed to compare genomic positions with and without indels */
 #define EVENTRIM_BADINTRON_PENALTY 2
+#define DO_FINAL 1
 
 #define OUTERLENGTH_SLOP 100
 
@@ -16488,7 +16489,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
   int n;
   int minscore5 = querylength5, minscore3 = querylength3, minscore = querylength5 + querylength3;
   int best_nsegments, best_nsegments_5, best_nsegments_3;
-  /* int max_nmatches = 0; */
+  int max_nmatches = 0;
 #ifdef USE_OPTIMAL_SCORE_BINGO
   int minscore_bingo = querylength5 + querylength3;
 #endif
@@ -16842,8 +16843,8 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
     }
 
   } else {
-    /* Final: Previously based purely on nmatches.  However, this leads to indelbreaks and bad introns. */
-#if 0
+#ifdef DO_FINAL
+    /* Final: based on nmatches. ? leads to indelbreaks and bad introns. But skipping this leads to nearly-identical alignments */
     max_nmatches = 0;
     for (p = hitpairlist; p != NULL; p = p->rest) {
       hitpair = (Stage3pair_T) p->first;
@@ -16858,6 +16859,10 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 	max_nmatches = hitpair->nmatches;
       }
     }
+
+    cutoff_level = max_nmatches - subopt_levels;
+    debug6(printf("cutoff level %d = max_nmatches %d - subopt %d\n",cutoff_level,max_nmatches,subopt_levels));
+
 #else
     for (p = hitpairlist; p != NULL; p = p->rest) {
       hitpair = (Stage3pair_T) p->first;
@@ -16901,6 +16906,15 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 		      hitpair,hittype_string(hitpair->hit5->hittype),hittype_string(hitpair->hit3->hittype),hitpair->score));
 	optimal = List_push(optimal,hitpair);
 
+#ifdef DO_FINAL
+      } else if (hitpair->nmatches < cutoff_level) {
+	debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with nmatches %d < cutoff_level %d (finalp %d)\n",
+		      hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
+		      hitpair->hit3->low - hitpair->hit3->chroffset,hitpair->hit3->high - hitpair->hit3->chroffset,
+		      hitpair->nmatches,cutoff_level,finalp));
+	*eliminatedp = true;
+	Stage3pair_free(&hitpair);
+#else
       } else if (hitpair->hit5->score_eventrim + hitpair->hit3->score_eventrim > cutoff_level) {
 	debug6(printf("Final: Eliminating hit pair %p at %u..%u|%u..%u with scores %d+%d > cutoff_level %d (finalp %d)\n",
 		      hitpair,hitpair->hit5->low - hitpair->hit5->chroffset,hitpair->hit5->high - hitpair->hit5->chroffset,
@@ -16908,6 +16922,7 @@ Stage3pair_optimal_score_aux (bool *eliminatedp, List_T hitpairlist,
 		      hitpair->hit5->score_eventrim,hitpair->hit3->score_eventrim,cutoff_level,finalp));
 	*eliminatedp = true;
 	Stage3pair_free(&hitpair);
+#endif
 
       } else {
 	debug6(printf("Final: Keeping hit pair %p with scores %d+%d (vs cutoff_level %d)\n",
diff --git a/util/gtf_genes.pl.in b/util/gtf_genes.pl.in
index 44e88b9..3ec4f0b 100644
--- a/util/gtf_genes.pl.in
+++ b/util/gtf_genes.pl.in
@@ -94,9 +94,9 @@ sub get_info {
     my $info = shift @_;
     my @desired_keys = @_;
     
-    foreach $item (@ {$info}) {
-	($key,$value) = $item =~ /(\S+) (.+)/;
-	foreach $desired_key (@desired_keys) {
+    foreach $desired_key (@desired_keys) {
+	foreach $item (@ {$info}) {
+	    ($key,$value) = $item =~ /(\S+) (.+)/;
 	    if ($key eq $desired_key) {
 		return $value;
 	    }
@@ -112,9 +112,9 @@ sub get_info_optional {
     my $info = shift @_;
     my @desired_keys = @_;
     
-    foreach $item (@ {$info}) {
-	($key,$value) = $item =~ /(\S+) (.+)/;
-	foreach $desired_key (@desired_keys) {
+    foreach $desired_key (@desired_keys) {
+	foreach $item (@ {$info}) {
+	    ($key,$value) = $item =~ /(\S+) (.+)/;
 	    if ($key eq $desired_key) {
 		return $value;
 	    }
diff --git a/util/gtf_introns.pl.in b/util/gtf_introns.pl.in
index ae3c364..232dc40 100755
--- a/util/gtf_introns.pl.in
+++ b/util/gtf_introns.pl.in
@@ -177,9 +177,9 @@ sub get_info {
     my $info = shift @_;
     my @desired_keys = @_;
     
-    foreach $item (@ {$info}) {
-	($key,$value) = $item =~ /(\S+) (.+)/;
-	foreach $desired_key (@desired_keys) {
+    foreach $desired_key (@desired_keys) {
+	foreach $item (@ {$info}) {
+	    ($key,$value) = $item =~ /(\S+) (.+)/;
 	    if ($key eq $desired_key) {
 		return $value;
 	    }
@@ -195,9 +195,9 @@ sub get_info_optional {
     my $info = shift @_;
     my @desired_keys = @_;
     
-    foreach $item (@ {$info}) {
-	($key,$value) = $item =~ /(\S+) (.+)/;
-	foreach $desired_key (@desired_keys) {
+    foreach $desired_key (@desired_keys) {
+	foreach $item (@ {$info}) {
+	    ($key,$value) = $item =~ /(\S+) (.+)/;
 	    if ($key eq $desired_key) {
 		return $value;
 	    }
diff --git a/util/gtf_splicesites.pl.in b/util/gtf_splicesites.pl.in
index e018f0e..d296a9c 100755
--- a/util/gtf_splicesites.pl.in
+++ b/util/gtf_splicesites.pl.in
@@ -177,9 +177,9 @@ sub get_info {
     my $info = shift @_;
     my @desired_keys = @_;
     
-    foreach $item (@ {$info}) {
-	($key,$value) = $item =~ /(\S+) (.+)/;
-	foreach $desired_key (@desired_keys) {
+    foreach $desired_key (@desired_keys) {
+	foreach $item (@ {$info}) {
+	    ($key,$value) = $item =~ /(\S+) (.+)/;
 	    if ($key eq $desired_key) {
 		return $value;
 	    }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git



More information about the debian-med-commit mailing list