[med-svn] [Git][med-team/gmap][upstream] 2 commits: New upstream version 2020-04-08+ds

Andreas Tille gitlab at salsa.debian.org
Sat Apr 11 20:59:00 BST 2020



Andreas Tille pushed to branch upstream at Debian Med / gmap


Commits:
ea8a10e9 by Andreas Tille at 2020-04-11T07:49:13+02:00
New upstream version 2020-04-08+ds
- - - - -
580578c8 by Andreas Tille at 2020-04-11T18:20:40+02:00
New upstream version 2020-04-08+ds1
- - - - -


30 changed files:

- ChangeLog
- VERSION
- configure.ac
- src/access.c
- src/access.h
- src/compress-write.c
- src/compress-write.h
- src/dynprog_end.c
- src/get-genome.c
- − src/getopt.c
- − src/getopt.h
- − src/getopt1.c
- src/gmap.c
- src/gmapindex.c
- src/gsnap.c
- src/iit-read-univ.c
- src/iit-read.c
- src/iit_get.c
- src/iit_store.c
- src/indexdb-cat.c
- src/output.c
- src/output.h
- src/pair.c
- src/pair.h
- src/regiondb-write.c
- src/stage3hr.c
- src/stage3hrdef.h
- src/substring.c
- src/substring.h
- util/gmap_cat.pl.in


Changes:

=====================================
ChangeLog
=====================================
The diff for this file was not included because it is too large.

=====================================
VERSION
=====================================
@@ -1 +1 @@
-2020-03-12
\ No newline at end of file
+2020-04-08
\ No newline at end of file


=====================================
configure.ac
=====================================
@@ -390,7 +390,7 @@ else
 AX_CPUID_NON_INTEL
 fi
 
-AX_EXT
+AX_EXT   # Sets SIMD_CFLAGS, and HAVE_ALTIVEC,HAVE_MMX,HAVE_SSE,HAVE_SSE2,...
 if test "x$ax_cv_want_simd" = xno; then
   compile_level=none
 elif test "x$ax_make_avx512bw" = xyes; then


=====================================
src/access.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: access.c 222113 2020-03-06 18:05:33Z twu $";
+static char rcsid[] = "$Id: access.c 222390 2020-04-10 12:44:01Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1014,8 +1014,8 @@ Access_mmap (int *fd, size_t *len, double *seconds, char *filename, bool randomp
   } else {
     *len = length;
     memory = mmap(NULL,length,PROT_READ,0
-#ifdef HAVE_MMAP_MAP_SHARED
-		  |MAP_SHARED
+#ifdef HAVE_MMAP_MAP_PRIVATE
+		  |MAP_PRIVATE
 #endif
 #ifdef HAVE_MMAP_MAP_FILE
 		  |MAP_FILE
@@ -1086,8 +1086,8 @@ Access_mmap_offset (int *remainder, int fd, size_t offset, size_t length, bool r
     memory = (void *) NULL;
   } else {
     memory = mmap(NULL,length,PROT_READ,0
-#ifdef HAVE_MMAP_MAP_SHARED
-		  |MAP_SHARED
+#ifdef HAVE_MMAP_MAP_PRIVATE
+		  |MAP_PRIVATE
 #endif
 #ifdef HAVE_MMAP_MAP_FILE
 		  |MAP_FILE
@@ -1127,7 +1127,8 @@ Access_mmap_offset (int *remainder, int fd, size_t offset, size_t length, bool r
 #endif
 
 
-
+#if 0
+/* All programs are read-only */
 #ifdef HAVE_MMAP
 /* Returns NULL if mmap fails.  Bigendian conversion required */
 #ifdef HAVE_CADDR_T
@@ -1196,7 +1197,11 @@ Access_mmap_rw (int *fd, size_t *len, char *filename, bool randomp) {
   return memory;
 }
 #endif
+#endif	/* if 0 */
 
+
+#if 0
+/* All programs are read-only */
 #ifdef HAVE_MMAP
 /* Returns NULL if mmap fails.  Bigendian conversion required */
 #ifdef HAVE_CADDR_T
@@ -1263,7 +1268,7 @@ Access_mmap_offset_rw (int *remainder, int fd, size_t offset, size_t length, boo
   return memory;
 }
 #endif
-
+#endif	/* if 0 */
 
 
 
@@ -1314,8 +1319,8 @@ Access_mmap_and_preload (int *fd, size_t *len, int *npages, double *seconds, cha
     Stopwatch_start(stopwatch = Stopwatch_new());
 
     memory = mmap(NULL,length,PROT_READ,0
-#ifdef HAVE_MMAP_MAP_SHARED
-		  |MAP_SHARED
+#ifdef HAVE_MMAP_MAP_PRIVATE
+		  |MAP_PRIVATE
 #endif
 #ifdef HAVE_MMAP_MAP_FILE
 		  |MAP_FILE


=====================================
src/access.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: access.h 222113 2020-03-06 18:05:33Z twu $ */
+/* $Id: access.h 222390 2020-04-10 12:44:01Z twu $ */
 #ifndef ACCESS_INCLUDED
 #define ACCESS_INCLUDED
 #ifdef HAVE_CONFIG_H
@@ -73,20 +73,6 @@ extern void *
 #endif
 Access_mmap_offset (int *remainder, int fd, size_t offset, size_t length, bool randomp);
 
-#ifdef HAVE_CADDR_T
-extern caddr_t
-#else
-extern void *
-#endif
-Access_mmap_rw (int *fd, size_t *len, char *filename, bool randomp);
-
-#ifdef HAVE_CADDR_T
-extern caddr_t
-#else
-extern void *
-#endif
-Access_mmap_offset_rw (int *remainder, int fd, size_t offset, size_t length, bool randomp);
-
 #ifdef HAVE_CADDR_T
 extern caddr_t
 #else


=====================================
src/compress-write.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: compress-write.c 221920 2020-02-20 02:05:50Z twu $";
+static char rcsid[] = "$Id: compress-write.c 222312 2020-04-05 16:57:52Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -25,6 +25,8 @@ static char rcsid[] = "$Id: compress-write.c 221920 2020-02-20 02:05:50Z twu $";
 #endif
 #include "complement.h"
 #include "mem.h"		/* For Compress_new */
+#include "getline.h"
+
 
 /* Compress_cat */
 #ifdef DEBUG1
@@ -526,6 +528,123 @@ Compress_update_memory (int nbadchars, Genomecomp_T *genomecomp, char *gbuffer,
 }
 
 
+void
+Compress_compress (char **files, int nfiles, bool stdin_p) {
+  Genomecomp_T low = 0U, high = 0U, flags = 0U, carry;
+  Univcoord_T position = 0UL;
+  FILE *fp;
+  char *line, *p;
+  int c;
+  int in_counter = 0, filei;
+
+
+  fprintf(stderr,"Compressing genome");
+
+  if (stdin_p == true) {
+    nfiles = 1;
+  }
+  for (filei = 0; filei < nfiles; filei++) {
+    if (stdin_p == true) {
+      fp = stdin;
+    } else if ((fp = fopen(files[filei],"r")) == NULL) {
+      fprintf(stderr,"Could not open file %s\n",files[filei]);
+      exit(9);
+    }
+
+    while ((line = Getline(fp)) != NULL) {
+      if (line[0] == '>') {
+	/* Skip header */
+      } else {
+	p = line;
+	while ((c = (int) *p++) != '\0') {
+	  if (isalpha(c)) {
+	    in_counter++;
+
+	    carry = high & 3U;
+	    high >>= 2;
+	    low >>= 2;
+	    flags >>= 1;
+	    switch (carry) {
+	    case 0U: break;
+	    case 1U: low |= UINT4_LEFT_C; break;
+	    case 2U: low |= UINT4_LEFT_G; break;
+	    case 3U: low |= UINT4_LEFT_T; break;
+	    default: abort();
+	    }
+
+	    switch (uppercaseCode[c]) {
+	    case 'A': break;
+	    case 'C': high |= UINT4_LEFT_C; break;
+	    case 'G': high |= UINT4_LEFT_G; break;
+	    case 'T': high |= UINT4_LEFT_T; break;
+	    case 'N': flags |= UINT4_LEFT_BIT; break;
+	    case 'X': high |= UINT4_LEFT_T; flags |= UINT4_LEFT_BIT; break;
+	    default: 
+	      fprintf(stderr,"Non-standard nucleotide %c at position %lu.  Using N instead\n",c,position);
+	      flags |= UINT4_LEFT_BIT;
+	      break;
+	    }
+      
+	    /* 8 is simply bits per byte */
+	    if (in_counter == 8 * (int) sizeof(Genomecomp_T)) {
+	      FWRITE_UINT(high,stdout);
+	      FWRITE_UINT(low,stdout);
+	      FWRITE_UINT(flags,stdout);
+	    
+	      low = high = flags = 0U;
+	      in_counter = 0;
+	    }
+
+	    position++;
+	    if (position % MONITOR_INTERVAL == 0) {
+	      fprintf(stderr,".",position);
+	    }
+	  }
+	}
+      }
+
+      FREE(line);
+    }
+
+    if (stdin_p == false) {
+      fclose(fp);
+    }
+  }
+
+  if (in_counter > 0) {
+    while (in_counter < 8 * (int) sizeof(Genomecomp_T)) {
+      carry = high & 3U;
+      high >>= 2;
+      low >>= 2;
+      flags >>= 1;
+      switch (carry) {
+      case 0U: break;
+      case 1U: low |= UINT4_LEFT_C; break;
+      case 2U: low |= UINT4_LEFT_G; break;
+      case 3U: low |= UINT4_LEFT_T; break;
+      default: abort();
+      }
+      high |= UINT4_LEFT_T; flags |= UINT4_LEFT_BIT; /* Create X's at end */
+      in_counter++;
+    }
+
+    FWRITE_UINT(high,stdout);
+    FWRITE_UINT(low,stdout);
+    FWRITE_UINT(flags,stdout);
+  }
+
+  /* Plus extra high and low */
+  high = low = 0xFFFFFFFF;
+  FWRITE_UINT(high,stdout);
+  FWRITE_UINT(low,stdout);
+
+  fprintf(stderr,"\n");
+
+  return;
+}
+
+
+
 #ifdef HAVE_64_BIT
 static inline void
 nt_unshuffle (UINT4 *highbits, UINT4 *lowbits, UINT4 high, UINT4 low) {
@@ -713,7 +832,7 @@ void
 Compress_cat (FILE *out, char **files, Univcoord_T *genomelengths, int nfiles) {
   FILE *fp;
   char *file;
-  int current_pos, bufferlen, shift;
+  int current_pos = 0, bufferlen, shift;
   UINT4 Buffer[3];
 #ifdef HAVE_64_BIT
   UINT8 current_highlow = 0, buffer_highlow = 0;


=====================================
src/compress-write.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: compress-write.h 221681 2020-02-12 03:28:08Z twu $ */
+/* $Id: compress-write.h 222204 2020-03-23 22:40:15Z twu $ */
 #ifndef COMPRESS_WRITE_INCLUDED
 #define COMPRESS_WRITE_INCLUDED
 
@@ -17,6 +17,8 @@ extern int
 Compress_update_memory (int nbadchars, Genomecomp_T *genomecomp, char *gbuffer, Univcoord_T startpos,
 			Univcoord_T endpos);
 extern void
+Compress_compress (char **files, int nfiles, bool stdin_p);
+extern void
 Compress_unshuffle (FILE *out, FILE *in);
 extern void
 Compress_unshuffle_bits128 (FILE *out, FILE *in);


=====================================
src/dynprog_end.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_end.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: dynprog_end.c 222316 2020-04-05 18:18:36Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -580,7 +580,7 @@ find_best_endpoint_to_queryend_nogaps (int *bestr, int *bestc, int rlength, int
 
 static char complCode[128] = COMPLEMENT_LC;
 
-#if 0
+#ifdef DEBUG_SIMD
 static char
 get_genomic_nt (char *g_alt, int genomicpos, Univcoord_T chroffset, Univcoord_T chrhigh,
 		bool watsonp) {


=====================================
src/get-genome.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: get-genome.c 215487 2018-05-26 03:18:11Z twu $";
+static char rcsid[] = "$Id: get-genome.c 222304 2020-04-02 22:17:13Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -79,6 +79,7 @@ static bool codingp = false;
 
 /* Dump options */
 static bool dumpallp = false;
+static bool add_circular_p = false;
 static bool stream_chars_p = false;
 static bool stream_ints_p = false;
 static bool dumpchrp = false;
@@ -117,6 +118,7 @@ static struct option long_options[] = {
 
   /* Dump options */
   {"dump", no_argument, 0, 'A'},	/* dumpallp */
+  {"add-circular", no_argument, 0, 0}, /* add_circular_p */
   {"stream-chars", no_argument, 0, 0},	/* stream_chars_p */
   {"chromosomes", no_argument, 0, 'L'},	/* dumpchrp */
   {"forsam", no_argument, 0, 0},	/* dumpchr_forsam_p */
@@ -415,7 +417,7 @@ index_compare (const void *a, const void *b) {
 
 static void
 print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, Chrpos_T genomiclength,
-		Univ_IIT_T chromosome_iit, bool whole_chromosome_p) {
+		Univ_IIT_T chromosome_iit, bool whole_chromosome_p, bool circular_part_p) {
   char *chromosome1, *chromosome2, *ptr, c;
   Sequence_T genomicseg, genomicseg_alt, genomicseg_snp;
   Chrpos_T chrpos;
@@ -429,6 +431,8 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
     /* Don't print a header */
   } else if (header != NULL) {
     printf(">%s\n",header);
+  } else if (circular_part_p == true) {
+    /* Don't print a header */
   } else if (revcomp == true) {
     printf(">");
     chromosome1 = Univ_IIT_string_from_position(&chrpos,genomicstart+genomiclength-1,chromosome_iit);
@@ -1279,12 +1283,15 @@ main (int argc, char *argv[]) {
       } else if (!strcmp(long_name,"stream-ints")) {
 	stream_ints_p = true;
 
+      } else if (!strcmp(long_name,"add-circular")) {
+	add_circular_p = true;
+
       } else if (!strcmp(long_name,"coding")) {
 	codingp = true;
 
       } else {
 	/* Shouldn't reach here */
-	fprintf(stderr,"Don't recognize option %s.  For usage, run 'get-genome --help'",long_name);
+	fprintf(stderr,"Don't recognize option %s.  For usage, run 'get-genome --help'\n",long_name);
 	exit(9);
       }
       break;
@@ -1364,11 +1371,11 @@ main (int argc, char *argv[]) {
       if (Parserange_universal(&segment,&revcomp,&whole_chromosome_p,&genomicstart,&genomiclength,&chrstart,&chrend,
 			       &chroffset,&chrlength,with_colon,genomesubdir,fileroot) == true) {
 	print_sequence(genome,/*genomealt*/NULL,genomicstart,genomiclength,chromosome_iit,
-		       /*whole_chromosome_p*/true);
+		       /*whole_chromosome_p*/true,/*circular_part_p*/false);
 	if (circularp[indx] == true) {
 	  /* Print again, since internal genome represents circular chromosomes twice */
 	  print_sequence(genome,/*genomealt*/NULL,genomicstart,genomiclength,chromosome_iit,
-			 /*whole_chromosome_p*/true);
+			 /*whole_chromosome_p*/true,/*circular_part_p*/true);
 	}
       }
       FREE(segment);
@@ -1486,6 +1493,9 @@ main (int argc, char *argv[]) {
       chromosome_iit = Univ_IIT_read(iitfile,/*readonlyp*/true,/*add_iit_p*/false);
       FREE(iitfile);
 
+      /* Needed if --with-circular is specified */
+      circularp = Univ_IIT_circularp(&any_circular_p,chromosome_iit);
+
       if (snps_root == NULL || print_snps_mode == 0) {
 	genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
 			    uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
@@ -1509,7 +1519,12 @@ main (int argc, char *argv[]) {
 	if (Parserange_universal(&segment,&revcomp,&whole_chromosome_p,&genomicstart,&genomiclength,&chrstart,&chrend,
 				 &chroffset,&chrlength,with_colon,genomesubdir,fileroot) == true) {
 	  print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
-			 /*whole_chromosome_p*/true);
+			 /*whole_chromosome_p*/true,/*circular_part_p*/false);
+	  if (add_circular_p == true && circularp[indx] == true) {
+	    /* Print again, since internal genome represents circular chromosomes twice */
+	    print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
+			   /*whole_chromosome_p*/true,/*circular_part_p*/true);
+	  }
 	}
 	FREE(segment);
 	FREE(with_colon);
@@ -1690,7 +1705,7 @@ main (int argc, char *argv[]) {
 	debug(printf("Query %s parsed as: genomicstart = %llu, genomiclength = %u, revcomp = %d\n",
 		     argv[0],(unsigned long long) genomicstart,genomiclength,revcomp));
 	print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
-		       whole_chromosome_p);
+		       whole_chromosome_p,/*circular_part_p*/false);
       }
       FREE(segment);
 
@@ -1989,7 +2004,7 @@ main (int argc, char *argv[]) {
 	  
 	} else {
 	  print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
-			 /*whole_chromosome_p*/false);
+			 /*whole_chromosome_p*/false,/*circular_part_p*/false);
 	}
 
 	FREE(divstring2);


=====================================
src/getopt.c deleted
=====================================
@@ -1,1262 +0,0 @@
-/* Getopt for GNU.
-   NOTE: getopt is now part of the C library, so if you don't know what
-   "Keep this file name-space clean" means, talk to drepper at gnu.org
-   before changing it!
-   Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002
-   	Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
-   Ditto for AIX 3.2 and <stdlib.h>.  */
-#ifndef _NO_PROTO
-# define _NO_PROTO
-#endif
-
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#if !defined __STDC__ || !__STDC__
-/* This is a separate conditional since some stdc systems
-   reject `defined (const)'.  */
-# ifndef const
-#  define const
-# endif
-#endif
-
-#include <stdio.h>
-
-/* Comment out all this code if we are using the GNU C Library, and are not
-   actually compiling the library itself.  This code is part of the GNU C
-   Library, but also included in many other GNU distributions.  Compiling
-   and linking in this code is a waste when using the GNU C library
-   (especially if it is a shared library).  Rather than having every GNU
-   program understand `configure --with-gnu-libc' and omit the object files,
-   it is simpler to just do this in the source for each such file.  */
-
-#define GETOPT_INTERFACE_VERSION 2
-#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
-# include <gnu-versions.h>
-# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
-#  define ELIDE_CODE
-# endif
-#endif
-
-#ifndef ELIDE_CODE
-
-
-/* This needs to come after some library #include
-   to get __GNU_LIBRARY__ defined.  */
-#ifdef	__GNU_LIBRARY__
-/* Don't include stdlib.h for non-GNU C libraries because some of them
-   contain conflicting prototypes for getopt.  */
-# include <stdlib.h>
-# include <unistd.h>
-#endif	/* GNU C library.  */
-
-#ifdef VMS
-# include <unixlib.h>
-# if HAVE_STRING_H - 0
-#  include <string.h>
-# endif
-#endif
-
-#ifndef attribute_hidden
-# define attribute_hidden
-#endif
-
-/* This version of `getopt' appears to the caller like standard Unix `getopt'
-   but it behaves differently for the user, since it allows the user
-   to intersperse the options with the other arguments.
-
-   As `getopt' works, it permutes the elements of ARGV so that,
-   when it is done, all the options precede everything else.  Thus
-   all application programs are extended to handle flexible argument order.
-
-   Setting the environment variable POSIXLY_CORRECT disables permutation..
-   Then the behavior is completely standard.
-
-   GNU application programs can use a third alternative mode in which
-   they can distinguish the relative order of options and other arguments.  */
-
-#include "getopt.h"
-
-/* For communication from `getopt' to the caller.
-   When `getopt' finds an option that takes an argument,
-   the argument value is returned here.
-   Also, when `ordering' is RETURN_IN_ORDER,
-   each non-option ARGV-element is returned here.  */
-
-char *optarg;
-
-/* Index in ARGV of the next element to be scanned.
-   This is used for communication to and from the caller
-   and for communication between successive calls to `getopt'.
-
-   On entry to `getopt', zero means this is the first call; initialize.
-
-   When `getopt' returns -1, this is the index of the first of the
-   non-option elements that the caller should itself scan.
-
-   Otherwise, `optind' communicates from one call to the next
-   how much of ARGV has been scanned so far.  */
-
-/* 1003.2 says this must be 1 before any call.  */
-int optind = 1;
-
-/* Formerly, initialization of getopt depended on optind==0, which
-   causes problems with re-calling getopt as programs generally don't
-   know that. */
-
-int __getopt_initialized attribute_hidden;
-
-/* The next char to be scanned in the option-element
-   in which the last option character we returned was found.
-   This allows us to pick up the scan where we left off.
-
-   If this is zero, or a null string, it means resume the scan
-   by advancing to the next ARGV-element.  */
-
-static char *nextchar;
-
-/* Callers store zero here to inhibit the error message
-   for unrecognized options.  */
-
-int opterr = 1;
-
-/* Set to an option character which was unrecognized.
-   This must be initialized on some systems to avoid linking in the
-   system's own getopt implementation.  */
-
-int optopt = '?';
-
-/* Describe how to deal with options that follow non-option ARGV-elements.
-
-   If the caller did not specify anything,
-   the default is REQUIRE_ORDER if the environment variable
-   POSIXLY_CORRECT is defined, PERMUTE otherwise.
-
-   REQUIRE_ORDER means don't recognize them as options;
-   stop option processing when the first non-option is seen.
-   This is what Unix does.
-   This mode of operation is selected by either setting the environment
-   variable POSIXLY_CORRECT, or using `+' as the first character
-   of the list of option characters.
-
-   PERMUTE is the default.  We permute the contents of ARGV as we scan,
-   so that eventually all the non-options are at the end.  This allows options
-   to be given in any order, even with programs that were not written to
-   expect this.
-
-   RETURN_IN_ORDER is an option available to programs that were written
-   to expect options and other ARGV-elements in any order and that care about
-   the ordering of the two.  We describe each non-option ARGV-element
-   as if it were the argument of an option with character code 1.
-   Using `-' as the first character of the list of option characters
-   selects this mode of operation.
-
-   The special argument `--' forces an end of option-scanning regardless
-   of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
-   `--' can cause `getopt' to return -1 with `optind' != ARGC.  */
-
-static enum
-{
-  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
-} ordering;
-
-/* Value of POSIXLY_CORRECT environment variable.  */
-static char *posixly_correct;
-
-#ifdef	__GNU_LIBRARY__
-/* We want to avoid inclusion of string.h with non-GNU libraries
-   because there are many ways it can cause trouble.
-   On some systems, it contains special magic macros that don't work
-   in GCC.  */
-# include <string.h>
-# define my_index	strchr
-#else
-
-# if HAVE_STRING_H
-#  include <string.h>
-# else
-#  include <strings.h>
-# endif
-
-/* Avoid depending on library functions or files
-   whose names are inconsistent.  */
-
-#ifndef getenv
-extern char *getenv ();
-#endif
-
-static char *
-my_index (str, chr)
-     const char *str;
-     int chr;
-{
-  while (*str)
-    {
-      if (*str == chr)
-	return (char *) str;
-      str++;
-    }
-  return 0;
-}
-
-/* If using GCC, we can safely declare strlen this way.
-   If not using GCC, it is ok not to declare it.  */
-#ifdef __GNUC__
-/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
-   That was relevant to code that was here before.  */
-# if (!defined __STDC__ || !__STDC__) && !defined strlen
-/* gcc with -traditional declares the built-in strlen to return int,
-   and has done so at least since version 2.4.5. -- rms.  */
-extern int strlen (const char *);
-# endif /* not __STDC__ */
-#endif /* __GNUC__ */
-
-#endif /* not __GNU_LIBRARY__ */
-
-/* Handle permutation of arguments.  */
-
-/* Describe the part of ARGV that contains non-options that have
-   been skipped.  `first_nonopt' is the index in ARGV of the first of them;
-   `last_nonopt' is the index after the last of them.  */
-
-static int first_nonopt;
-static int last_nonopt;
-
-#ifdef _LIBC
-/* Stored original parameters.
-   XXX This is no good solution.  We should rather copy the args so
-   that we can compare them later.  But we must not use malloc(3).  */
-extern int __libc_argc;
-extern char **__libc_argv;
-
-/* Bash 2.0 gives us an environment variable containing flags
-   indicating ARGV elements that should not be considered arguments.  */
-
-# ifdef USE_NONOPTION_FLAGS
-/* Defined in getopt_init.c  */
-extern char *__getopt_nonoption_flags;
-
-static int nonoption_flags_max_len;
-static int nonoption_flags_len;
-# endif
-
-# ifdef USE_NONOPTION_FLAGS
-#  define SWAP_FLAGS(ch1, ch2) \
-  if (nonoption_flags_len > 0)						      \
-    {									      \
-      char __tmp = __getopt_nonoption_flags[ch1];			      \
-      __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2];	      \
-      __getopt_nonoption_flags[ch2] = __tmp;				      \
-    }
-# else
-#  define SWAP_FLAGS(ch1, ch2)
-# endif
-#else	/* !_LIBC */
-# define SWAP_FLAGS(ch1, ch2)
-#endif	/* _LIBC */
-
-/* Exchange two adjacent subsequences of ARGV.
-   One subsequence is elements [first_nonopt,last_nonopt)
-   which contains all the non-options that have been skipped so far.
-   The other is elements [last_nonopt,optind), which contains all
-   the options processed since those non-options were skipped.
-
-   `first_nonopt' and `last_nonopt' are relocated so that they describe
-   the new indices of the non-options in ARGV after they are moved.  */
-
-#if defined __STDC__ && __STDC__
-static void exchange (char **);
-#endif
-
-static void
-exchange (argv)
-     char **argv;
-{
-  int bottom = first_nonopt;
-  int middle = last_nonopt;
-  int top = optind;
-  char *tem;
-
-  /* Exchange the shorter segment with the far end of the longer segment..
-     That puts the shorter segment into the right place.
-     It leaves the longer segment in the right place overall,
-     but it consists of two parts that need to be swapped next.  */
-
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
-  /* First make sure the handling of the `__getopt_nonoption_flags'
-     string can work normally.  Our top argument must be in the range
-     of the string.  */
-  if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
-    {
-      /* We must extend the array.  The user plays games with us and
-	 presents new arguments.  */
-      char *new_str = malloc (top + 1);
-      if (new_str == NULL)
-	nonoption_flags_len = nonoption_flags_max_len = 0;
-      else
-	{
-	  memset (__mempcpy (new_str, __getopt_nonoption_flags,
-			     nonoption_flags_max_len),
-		  '\0', top + 1 - nonoption_flags_max_len);
-	  nonoption_flags_max_len = top + 1;
-	  __getopt_nonoption_flags = new_str;
-	}
-    }
-#endif
-
-  while (top > middle && middle > bottom)
-    {
-      if (top - middle > middle - bottom)
-	{
-	  /* Bottom segment is the short one.  */
-	  int len = middle - bottom;
-	  register int i;
-
-	  /* Swap it with the top part of the top segment.  */
-	  for (i = 0; i < len; i++)
-	    {
-	      tem = argv[bottom + i];
-	      argv[bottom + i] = argv[top - (middle - bottom) + i];
-	      argv[top - (middle - bottom) + i] = tem;
-	      SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
-	    }
-	  /* Exclude the moved bottom segment from further swapping.  */
-	  top -= len;
-	}
-      else
-	{
-	  /* Top segment is the short one.  */
-	  int len = top - middle;
-	  register int i;
-
-	  /* Swap it with the bottom part of the bottom segment.  */
-	  for (i = 0; i < len; i++)
-	    {
-	      tem = argv[bottom + i];
-	      argv[bottom + i] = argv[middle + i];
-	      argv[middle + i] = tem;
-	      SWAP_FLAGS (bottom + i, middle + i);
-	    }
-	  /* Exclude the moved top segment from further swapping.  */
-	  bottom += len;
-	}
-    }
-
-  /* Update records for the slots the non-options now occupy.  */
-
-  first_nonopt += (optind - last_nonopt);
-  last_nonopt = optind;
-}
-
-/* Initialize the internal data when the first call is made.  */
-
-#if defined __STDC__ && __STDC__
-static const char *_getopt_initialize (int, char *const *, const char *);
-#endif
-static const char *
-_getopt_initialize (argc, argv, optstring)
-     int argc;
-     char *const *argv;
-     const char *optstring;
-{
-  /* Start processing options with ARGV-element 1 (since ARGV-element 0
-     is the program name); the sequence of previously skipped
-     non-option ARGV-elements is empty.  */
-
-  first_nonopt = last_nonopt = optind;
-
-  nextchar = NULL;
-
-  posixly_correct = getenv ("POSIXLY_CORRECT");
-
-  /* Determine how to handle the ordering of options and nonoptions.  */
-
-  if (optstring[0] == '-')
-    {
-      ordering = RETURN_IN_ORDER;
-      ++optstring;
-    }
-  else if (optstring[0] == '+')
-    {
-      ordering = REQUIRE_ORDER;
-      ++optstring;
-    }
-  else if (posixly_correct != NULL)
-    ordering = REQUIRE_ORDER;
-  else
-    ordering = PERMUTE;
-
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
-  if (posixly_correct == NULL
-      && argc == __libc_argc && argv == __libc_argv)
-    {
-      if (nonoption_flags_max_len == 0)
-	{
-	  if (__getopt_nonoption_flags == NULL
-	      || __getopt_nonoption_flags[0] == '\0')
-	    nonoption_flags_max_len = -1;
-	  else
-	    {
-	      const char *orig_str = __getopt_nonoption_flags;
-	      int len = nonoption_flags_max_len = strlen (orig_str);
-	      if (nonoption_flags_max_len < argc)
-		nonoption_flags_max_len = argc;
-	      __getopt_nonoption_flags =
-		(char *) malloc (nonoption_flags_max_len);
-	      if (__getopt_nonoption_flags == NULL)
-		nonoption_flags_max_len = -1;
-	      else
-		memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
-			'\0', nonoption_flags_max_len - len);
-	    }
-	}
-      nonoption_flags_len = nonoption_flags_max_len;
-    }
-  else
-    nonoption_flags_len = 0;
-#endif
-
-  return optstring;
-}
-
-/* Scan elements of ARGV (whose length is ARGC) for option characters
-   given in OPTSTRING.
-
-   If an element of ARGV starts with '-', and is not exactly "-" or "--",
-   then it is an option element.  The characters of this element
-   (aside from the initial '-') are option characters.  If `getopt'
-   is called repeatedly, it returns successively each of the option characters
-   from each of the option elements.
-
-   If `getopt' finds another option character, it returns that character,
-   updating `optind' and `nextchar' so that the next call to `getopt' can
-   resume the scan with the following option character or ARGV-element.
-
-   If there are no more option characters, `getopt' returns -1.
-   Then `optind' is the index in ARGV of the first ARGV-element
-   that is not an option.  (The ARGV-elements have been permuted
-   so that those that are not options now come last.)
-
-   OPTSTRING is a string containing the legitimate option characters.
-   If an option character is seen that is not listed in OPTSTRING,
-   return '?' after printing an error message.  If you set `opterr' to
-   zero, the error message is suppressed but we still return '?'.
-
-   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
-   so the following text in the same ARGV-element, or the text of the following
-   ARGV-element, is returned in `optarg'.  Two colons mean an option that
-   wants an optional arg; if there is text in the current ARGV-element,
-   it is returned in `optarg', otherwise `optarg' is set to zero.
-
-   If OPTSTRING starts with `-' or `+', it requests different methods of
-   handling the non-option ARGV-elements.
-   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
-
-   Long-named options begin with `--' instead of `-'.
-   Their names may be abbreviated as long as the abbreviation is unique
-   or is an exact match for some defined option.  If they have an
-   argument, it follows the option name in the same ARGV-element, separated
-   from the option name by a `=', or else the in next ARGV-element.
-   When `getopt' finds a long-named option, it returns 0 if that option's
-   `flag' field is nonzero, the value of the option's `val' field
-   if the `flag' field is zero.
-
-   The elements of ARGV aren't really const, because we permute them.
-   But we pretend they're const in the prototype to be compatible
-   with other systems.
-
-   LONGOPTS is a vector of `struct option' terminated by an
-   element containing a name which is zero.
-
-   LONGIND returns the index in LONGOPT of the long-named option found.
-   It is only valid when a long-named option has been found by the most
-   recent call.
-
-   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
-   long-named options.  */
-
-int
-_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
-     int argc;
-     char *const *argv;
-     const char *optstring;
-     const struct option *longopts;
-     int *longind;
-     int long_only;
-{
-  int print_errors = opterr;
-  if (optstring[0] == ':')
-    print_errors = 0;
-
-  if (argc < 1)
-    return -1;
-
-  optarg = NULL;
-
-  if (optind == 0 || !__getopt_initialized)
-    {
-      if (optind == 0)
-	optind = 1;	/* Don't scan ARGV[0], the program name.  */
-      optstring = _getopt_initialize (argc, argv, optstring);
-      __getopt_initialized = 1;
-    }
-
-  /* Test whether ARGV[optind] points to a non-option argument.
-     Either it does not have option syntax, or there is an environment flag
-     from the shell indicating it is not an option.  The later information
-     is only used when the used in the GNU libc.  */
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
-# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0'	      \
-		      || (optind < nonoption_flags_len			      \
-			  && __getopt_nonoption_flags[optind] == '1'))
-#else
-# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
-#endif
-
-  if (nextchar == NULL || *nextchar == '\0')
-    {
-      /* Advance to the next ARGV-element.  */
-
-      /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
-	 moved back by the user (who may also have changed the arguments).  */
-      if (last_nonopt > optind)
-	last_nonopt = optind;
-      if (first_nonopt > optind)
-	first_nonopt = optind;
-
-      if (ordering == PERMUTE)
-	{
-	  /* If we have just processed some options following some non-options,
-	     exchange them so that the options come first.  */
-
-	  if (first_nonopt != last_nonopt && last_nonopt != optind)
-	    exchange ((char **) argv);
-	  else if (last_nonopt != optind)
-	    first_nonopt = optind;
-
-	  /* Skip any additional non-options
-	     and extend the range of non-options previously skipped.  */
-
-	  while (optind < argc && NONOPTION_P)
-	    optind++;
-	  last_nonopt = optind;
-	}
-
-      /* The special ARGV-element `--' means premature end of options.
-	 Skip it like a null option,
-	 then exchange with previous non-options as if it were an option,
-	 then skip everything else like a non-option.  */
-
-      if (optind != argc && !strcmp (argv[optind], "--"))
-	{
-	  optind++;
-
-	  if (first_nonopt != last_nonopt && last_nonopt != optind)
-	    exchange ((char **) argv);
-	  else if (first_nonopt == last_nonopt)
-	    first_nonopt = optind;
-	  last_nonopt = argc;
-
-	  optind = argc;
-	}
-
-      /* If we have done all the ARGV-elements, stop the scan
-	 and back over any non-options that we skipped and permuted.  */
-
-      if (optind == argc)
-	{
-	  /* Set the next-arg-index to point at the non-options
-	     that we previously skipped, so the caller will digest them.  */
-	  if (first_nonopt != last_nonopt)
-	    optind = first_nonopt;
-	  return -1;
-	}
-
-      /* If we have come to a non-option and did not permute it,
-	 either stop the scan or describe it to the caller and pass it by.  */
-
-      if (NONOPTION_P)
-	{
-	  if (ordering == REQUIRE_ORDER)
-	    return -1;
-	  optarg = argv[optind++];
-	  return 1;
-	}
-
-      /* We have found another option-ARGV-element.
-	 Skip the initial punctuation.  */
-
-      nextchar = (argv[optind] + 1
-		  + (longopts != NULL && argv[optind][1] == '-'));
-    }
-
-  /* Decode the current option-ARGV-element.  */
-
-  /* Check whether the ARGV-element is a long option.
-
-     If long_only and the ARGV-element has the form "-f", where f is
-     a valid short option, don't consider it an abbreviated form of
-     a long option that starts with f.  Otherwise there would be no
-     way to give the -f short option.
-
-     On the other hand, if there's a long option "fubar" and
-     the ARGV-element is "-fu", do consider that an abbreviation of
-     the long option, just like "--fu", and not "-f" with arg "u".
-
-     This distinction seems to be the most useful approach.  */
-
-  if (longopts != NULL
-      && (argv[optind][1] == '-'
-	  || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
-    {
-      char *nameend;
-      const struct option *p;
-      const struct option *pfound = NULL;
-      int exact = 0;
-      int ambig = 0;
-      int indfound = -1;
-      int option_index;
-
-      for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
-	/* Do nothing.  */ ;
-
-      /* Test all long options for either exact match
-	 or abbreviated matches.  */
-      for (p = longopts, option_index = 0; p->name; p++, option_index++)
-	if (!strncmp (p->name, nextchar, nameend - nextchar))
-	  {
-	    if ((unsigned int) (nameend - nextchar)
-		== (unsigned int) strlen (p->name))
-	      {
-		/* Exact match found.  */
-		pfound = p;
-		indfound = option_index;
-		exact = 1;
-		break;
-	      }
-	    else if (pfound == NULL)
-	      {
-		/* First nonexact match found.  */
-		pfound = p;
-		indfound = option_index;
-	      }
-	    else if (long_only
-		     || pfound->has_arg != p->has_arg
-		     || pfound->flag != p->flag
-		     || pfound->val != p->val)
-	      /* Second or later nonexact match found.  */
-	      ambig = 1;
-	  }
-
-      if (ambig && !exact)
-	{
-	  if (print_errors)
-	    {
-#if defined _LIBC && defined USE_IN_LIBIO
-	      char *buf;
-
-	      if (__asprintf (&buf, "%s: option `%s' is ambiguous\n",
-			      argv[0], argv[optind]) >= 0)
-		{
-
-		  if (_IO_fwide (stderr, 0) > 0)
-		    __fwprintf (stderr, L"%s", buf);
-		  else
-		    fputs (buf, stderr);
-
-		  free (buf);
-		}
-#else
-	      fprintf (stderr, "%s: option `%s' is ambiguous\n",
-		       argv[0], argv[optind]);
-#endif
-	    }
-	  nextchar += strlen (nextchar);
-	  optind++;
-	  optopt = 0;
-	  return '?';
-	}
-
-      if (pfound != NULL)
-	{
-	  option_index = indfound;
-	  optind++;
-	  if (*nameend)
-	    {
-	      /* Don't test has_arg with >, because some C compilers don't
-		 allow it to be used on enums.  */
-	      if (pfound->has_arg)
-		optarg = nameend + 1;
-	      else
-		{
-		  if (print_errors)
-		    {
-#if defined _LIBC && defined USE_IN_LIBIO
-		      char *buf;
-		      int n;
-#endif
-
-		      if (argv[optind - 1][1] == '-')
-			{
-			  /* --option */
-#if defined _LIBC && defined USE_IN_LIBIO
-			  n = __asprintf (&buf, "\
-%s: option `--%s' doesn't allow an argument\n",
-					  argv[0], pfound->name);
-#else
-			  fprintf (stderr, "\
-%s: option `--%s' doesn't allow an argument\n",
-				   argv[0], pfound->name);
-#endif
-			}
-		      else
-			{
-			  /* +option or -option */
-#if defined _LIBC && defined USE_IN_LIBIO
-			  n = __asprintf (&buf, "\
-%s: option `%c%s' doesn't allow an argument\n",
-					  argv[0], argv[optind - 1][0],
-					  pfound->name);
-#else
-			  fprintf (stderr, "\
-%s: option `%c%s' doesn't allow an argument\n",
-				   argv[0], argv[optind - 1][0], pfound->name);
-#endif
-			}
-
-#if defined _LIBC && defined USE_IN_LIBIO
-		      if (n >= 0)
-			{
-			  if (_IO_fwide (stderr, 0) > 0)
-			    __fwprintf (stderr, L"%s", buf);
-			  else
-			    fputs (buf, stderr);
-
-			  free (buf);
-			}
-#endif
-		    }
-
-		  nextchar += strlen (nextchar);
-
-		  optopt = pfound->val;
-		  return '?';
-		}
-	    }
-	  else if (pfound->has_arg == 1)
-	    {
-	      if (optind < argc)
-		optarg = argv[optind++];
-	      else
-		{
-		  if (print_errors)
-		    {
-#if defined _LIBC && defined USE_IN_LIBIO
-		      char *buf;
-
-		      if (__asprintf (&buf, "\
-%s: option `%s' requires an argument\n",
-				      argv[0], argv[optind - 1]) >= 0)
-			{
-			  if (_IO_fwide (stderr, 0) > 0)
-			    __fwprintf (stderr, L"%s", buf);
-			  else
-			    fputs (buf, stderr);
-
-			  free (buf);
-			}
-#else
-		      fprintf (stderr,
-			       "%s: option `%s' requires an argument\n",
-			       argv[0], argv[optind - 1]);
-#endif
-		    }
-		  nextchar += strlen (nextchar);
-		  optopt = pfound->val;
-		  return optstring[0] == ':' ? ':' : '?';
-		}
-	    }
-	  nextchar += strlen (nextchar);
-	  if (longind != NULL)
-	    *longind = option_index;
-	  if (pfound->flag)
-	    {
-	      *(pfound->flag) = pfound->val;
-	      return 0;
-	    }
-	  return pfound->val;
-	}
-
-      /* Can't find it as a long option.  If this is not getopt_long_only,
-	 or the option starts with '--' or is not a valid short
-	 option, then it's an error.
-	 Otherwise interpret it as a short option.  */
-      if (!long_only || argv[optind][1] == '-'
-	  || my_index (optstring, *nextchar) == NULL)
-	{
-	  if (print_errors)
-	    {
-#if defined _LIBC && defined USE_IN_LIBIO
-	      char *buf;
-	      int n;
-#endif
-
-	      if (argv[optind][1] == '-')
-		{
-		  /* --option */
-#if defined _LIBC && defined USE_IN_LIBIO
-		  n = __asprintf (&buf, "%s: unrecognized option `--%s'\n",
-				  argv[0], nextchar);
-#else
-		  fprintf (stderr, "%s: unrecognized option `--%s'\n",
-			   argv[0], nextchar);
-#endif
-		}
-	      else
-		{
-		  /* +option or -option */
-#if defined _LIBC && defined USE_IN_LIBIO
-		  n = __asprintf (&buf, "%s: unrecognized option `%c%s'\n",
-				  argv[0], argv[optind][0], nextchar);
-#else
-		  fprintf (stderr, "%s: unrecognized option `%c%s'\n",
-			   argv[0], argv[optind][0], nextchar);
-#endif
-		}
-
-#if defined _LIBC && defined USE_IN_LIBIO
-	      if (n >= 0)
-		{
-		  if (_IO_fwide (stderr, 0) > 0)
-		    __fwprintf (stderr, L"%s", buf);
-		  else
-		    fputs (buf, stderr);
-
-		  free (buf);
-		}
-#endif
-	    }
-	  nextchar = (char *) "";
-	  optind++;
-	  optopt = 0;
-	  return '?';
-	}
-    }
-
-  /* Look at and handle the next short option-character.  */
-
-  {
-    char c = *nextchar++;
-    char *temp = my_index (optstring, c);
-
-    /* Increment `optind' when we start to process its last character.  */
-    if (*nextchar == '\0')
-      ++optind;
-
-    if (temp == NULL || c == ':')
-      {
-	if (print_errors)
-	  {
-#if defined _LIBC && defined USE_IN_LIBIO
-	      char *buf;
-	      int n;
-#endif
-
-	    if (posixly_correct)
-	      {
-		/* 1003.2 specifies the format of this message.  */
-#if defined _LIBC && defined USE_IN_LIBIO
-		n = __asprintf (&buf, "%s: illegal option -- %c\n",
-				argv[0], c);
-#else
-		fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
-#endif
-	      }
-	    else
-	      {
-#if defined _LIBC && defined USE_IN_LIBIO
-		n = __asprintf (&buf, "%s: invalid option -- %c\n",
-				argv[0], c);
-#else
-		fprintf (stderr, "%s: invalid option -- %c\n", argv[0], c);
-#endif
-	      }
-
-#if defined _LIBC && defined USE_IN_LIBIO
-	    if (n >= 0)
-	      {
-		if (_IO_fwide (stderr, 0) > 0)
-		  __fwprintf (stderr, L"%s", buf);
-		else
-		  fputs (buf, stderr);
-
-		free (buf);
-	      }
-#endif
-	  }
-	optopt = c;
-	return '?';
-      }
-    /* Convenience. Treat POSIX -W foo same as long option --foo */
-    if (temp[0] == 'W' && temp[1] == ';')
-      {
-	char *nameend;
-	const struct option *p;
-	const struct option *pfound = NULL;
-	int exact = 0;
-	int ambig = 0;
-	int indfound = 0;
-	int option_index;
-
-	/* This is an option that requires an argument.  */
-	if (*nextchar != '\0')
-	  {
-	    optarg = nextchar;
-	    /* If we end this ARGV-element by taking the rest as an arg,
-	       we must advance to the next element now.  */
-	    optind++;
-	  }
-	else if (optind == argc)
-	  {
-	    if (print_errors)
-	      {
-		/* 1003.2 specifies the format of this message.  */
-#if defined _LIBC && defined USE_IN_LIBIO
-		char *buf;
-
-		if (__asprintf (&buf,
-				"%s: option requires an argument -- %c\n",
-				argv[0], c) >= 0)
-		  {
-		    if (_IO_fwide (stderr, 0) > 0)
-		      __fwprintf (stderr, L"%s", buf);
-		    else
-		      fputs (buf, stderr);
-
-		    free (buf);
-		  }
-#else
-		fprintf (stderr, "%s: option requires an argument -- %c\n",
-			 argv[0], c);
-#endif
-	      }
-	    optopt = c;
-	    if (optstring[0] == ':')
-	      c = ':';
-	    else
-	      c = '?';
-	    return c;
-	  }
-	else
-	  /* We already incremented `optind' once;
-	     increment it again when taking next ARGV-elt as argument.  */
-	  optarg = argv[optind++];
-
-	/* optarg is now the argument, see if it's in the
-	   table of longopts.  */
-
-	for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
-	  /* Do nothing.  */ ;
-
-	/* Test all long options for either exact match
-	   or abbreviated matches.  */
-	for (p = longopts, option_index = 0; p->name; p++, option_index++)
-	  if (!strncmp (p->name, nextchar, nameend - nextchar))
-	    {
-	      if ((unsigned int) (nameend - nextchar) == strlen (p->name))
-		{
-		  /* Exact match found.  */
-		  pfound = p;
-		  indfound = option_index;
-		  exact = 1;
-		  break;
-		}
-	      else if (pfound == NULL)
-		{
-		  /* First nonexact match found.  */
-		  pfound = p;
-		  indfound = option_index;
-		}
-	      else
-		/* Second or later nonexact match found.  */
-		ambig = 1;
-	    }
-	if (ambig && !exact)
-	  {
-	    if (print_errors)
-	      {
-#if defined _LIBC && defined USE_IN_LIBIO
-		char *buf;
-
-		if (__asprintf (&buf, "%s: option `-W %s' is ambiguous\n",
-				argv[0], argv[optind]) >= 0)
-		  {
-		    if (_IO_fwide (stderr, 0) > 0)
-		      __fwprintf (stderr, L"%s", buf);
-		    else
-		      fputs (buf, stderr);
-
-		    free (buf);
-		  }
-#else
-		fprintf (stderr, "%s: option `-W %s' is ambiguous\n",
-			 argv[0], argv[optind]);
-#endif
-	      }
-	    nextchar += strlen (nextchar);
-	    optind++;
-	    return '?';
-	  }
-	if (pfound != NULL)
-	  {
-	    option_index = indfound;
-	    if (*nameend)
-	      {
-		/* Don't test has_arg with >, because some C compilers don't
-		   allow it to be used on enums.  */
-		if (pfound->has_arg)
-		  optarg = nameend + 1;
-		else
-		  {
-		    if (print_errors)
-		      {
-#if defined _LIBC && defined USE_IN_LIBIO
-			char *buf;
-
-			if (__asprintf (&buf, "\
-%s: option `-W %s' doesn't allow an argument\n",
-					argv[0], pfound->name) >= 0)
-			  {
-			    if (_IO_fwide (stderr, 0) > 0)
-			      __fwprintf (stderr, L"%s", buf);
-			    else
-			      fputs (buf, stderr);
-
-			    free (buf);
-			  }
-#else
-			fprintf (stderr, "\
-%s: option `-W %s' doesn't allow an argument\n",
-				 argv[0], pfound->name);
-#endif
-		      }
-
-		    nextchar += strlen (nextchar);
-		    return '?';
-		  }
-	      }
-	    else if (pfound->has_arg == 1)
-	      {
-		if (optind < argc)
-		  optarg = argv[optind++];
-		else
-		  {
-		    if (print_errors)
-		      {
-#if defined _LIBC && defined USE_IN_LIBIO
-			char *buf;
-
-			if (__asprintf (&buf, "\
-%s: option `%s' requires an argument\n",
-					argv[0], argv[optind - 1]) >= 0)
-			  {
-			    if (_IO_fwide (stderr, 0) > 0)
-			      __fwprintf (stderr, L"%s", buf);
-			    else
-			      fputs (buf, stderr);
-
-			    free (buf);
-			  }
-#else
-			fprintf (stderr,
-				 "%s: option `%s' requires an argument\n",
-				 argv[0], argv[optind - 1]);
-#endif
-		      }
-		    nextchar += strlen (nextchar);
-		    return optstring[0] == ':' ? ':' : '?';
-		  }
-	      }
-	    nextchar += strlen (nextchar);
-	    if (longind != NULL)
-	      *longind = option_index;
-	    if (pfound->flag)
-	      {
-		*(pfound->flag) = pfound->val;
-		return 0;
-	      }
-	    return pfound->val;
-	  }
-	  nextchar = NULL;
-	  return 'W';	/* Let the application handle it.   */
-      }
-    if (temp[1] == ':')
-      {
-	if (temp[2] == ':')
-	  {
-	    /* This is an option that accepts an argument optionally.  */
-	    if (*nextchar != '\0')
-	      {
-		optarg = nextchar;
-		optind++;
-	      }
-	    else
-	      optarg = NULL;
-	    nextchar = NULL;
-	  }
-	else
-	  {
-	    /* This is an option that requires an argument.  */
-	    if (*nextchar != '\0')
-	      {
-		optarg = nextchar;
-		/* If we end this ARGV-element by taking the rest as an arg,
-		   we must advance to the next element now.  */
-		optind++;
-	      }
-	    else if (optind == argc)
-	      {
-		if (print_errors)
-		  {
-		    /* 1003.2 specifies the format of this message.  */
-#if defined _LIBC && defined USE_IN_LIBIO
-		    char *buf;
-
-		    if (__asprintf (&buf, "\
-%s: option requires an argument -- %c\n",
-				    argv[0], c) >= 0)
-		      {
-			if (_IO_fwide (stderr, 0) > 0)
-			  __fwprintf (stderr, L"%s", buf);
-			else
-			  fputs (buf, stderr);
-
-			free (buf);
-		      }
-#else
-		    fprintf (stderr,
-			     "%s: option requires an argument -- %c\n",
-			     argv[0], c);
-#endif
-		  }
-		optopt = c;
-		if (optstring[0] == ':')
-		  c = ':';
-		else
-		  c = '?';
-	      }
-	    else
-	      /* We already incremented `optind' once;
-		 increment it again when taking next ARGV-elt as argument.  */
-	      optarg = argv[optind++];
-	    nextchar = NULL;
-	  }
-      }
-    return c;
-  }
-}
-
-int
-getopt (argc, argv, optstring)
-     int argc;
-     char *const *argv;
-     const char *optstring;
-{
-  return _getopt_internal (argc, argv, optstring,
-			   (const struct option *) 0,
-			   (int *) 0,
-			   0);
-}
-
-#endif	/* Not ELIDE_CODE.  */
-
-#ifdef TEST
-
-/* Compile with -DTEST to make an executable for use in testing
-   the above definition of `getopt'.  */
-
-int
-main (argc, argv)
-     int argc;
-     char **argv;
-{
-  int c;
-  int digit_optind = 0;
-
-  while (1)
-    {
-      int this_option_optind = optind ? optind : 1;
-
-      c = getopt (argc, argv, "abc:d:0123456789");
-      if (c == -1)
-	break;
-
-      switch (c)
-	{
-	case '0':
-	case '1':
-	case '2':
-	case '3':
-	case '4':
-	case '5':
-	case '6':
-	case '7':
-	case '8':
-	case '9':
-	  if (digit_optind != 0 && digit_optind != this_option_optind)
-	    printf ("digits occur in two different argv-elements.\n");
-	  digit_optind = this_option_optind;
-	  printf ("option %c\n", c);
-	  break;
-
-	case 'a':
-	  printf ("option a\n");
-	  break;
-
-	case 'b':
-	  printf ("option b\n");
-	  break;
-
-	case 'c':
-	  printf ("option c with value `%s'\n", optarg);
-	  break;
-
-	case '?':
-	  break;
-
-	default:
-	  printf ("?? getopt returned character code 0%o ??\n", c);
-	}
-    }
-
-  if (optind < argc)
-    {
-      printf ("non-option ARGV-elements: ");
-      while (optind < argc)
-	printf ("%s ", argv[optind++]);
-      printf ("\n");
-    }
-
-  exit (0);
-}
-
-#endif /* TEST */


=====================================
src/getopt.h deleted
=====================================
@@ -1,181 +0,0 @@
-/* Declarations for getopt.
-   Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifndef _GETOPT_H
-
-#ifndef __need_getopt
-# define _GETOPT_H 1
-#endif
-
-/* If __GNU_LIBRARY__ is not already defined, either we are being used
-   standalone, or this is the first header included in the source file.
-   If we are being used with glibc, we need to include <features.h>, but
-   that does not exist if we are standalone.  So: if __GNU_LIBRARY__ is
-   not defined, include <ctype.h>, which will pull in <features.h> for us
-   if it's from glibc.  (Why ctype.h?  It's guaranteed to exist and it
-   doesn't flood the namespace with stuff the way some other headers do.)  */
-#if !defined __GNU_LIBRARY__
-# include <ctype.h>
-#endif
-
-#ifdef	__cplusplus
-extern "C" {
-#endif
-
-/* For communication from `getopt' to the caller.
-   When `getopt' finds an option that takes an argument,
-   the argument value is returned here.
-   Also, when `ordering' is RETURN_IN_ORDER,
-   each non-option ARGV-element is returned here.  */
-
-extern char *optarg;
-
-/* Index in ARGV of the next element to be scanned.
-   This is used for communication to and from the caller
-   and for communication between successive calls to `getopt'.
-
-   On entry to `getopt', zero means this is the first call; initialize.
-
-   When `getopt' returns -1, this is the index of the first of the
-   non-option elements that the caller should itself scan.
-
-   Otherwise, `optind' communicates from one call to the next
-   how much of ARGV has been scanned so far.  */
-
-extern int optind;
-
-/* Callers store zero here to inhibit the error message `getopt' prints
-   for unrecognized options.  */
-
-extern int opterr;
-
-/* Set to an option character which was unrecognized.  */
-
-extern int optopt;
-
-#ifndef __need_getopt
-/* Describe the long-named options requested by the application.
-   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
-   of `struct option' terminated by an element containing a name which is
-   zero.
-
-   The field `has_arg' is:
-   no_argument		(or 0) if the option does not take an argument,
-   required_argument	(or 1) if the option requires an argument,
-   optional_argument 	(or 2) if the option takes an optional argument.
-
-   If the field `flag' is not NULL, it points to a variable that is set
-   to the value given in the field `val' when the option is found, but
-   left unchanged if the option is not found.
-
-   To have a long-named option do something other than set an `int' to
-   a compiled-in constant, such as set a value from `optarg', set the
-   option's `flag' field to zero and its `val' field to a nonzero
-   value (the equivalent single-letter option character, if there is
-   one).  For long options that have a zero `flag' field, `getopt'
-   returns the contents of the `val' field.  */
-
-struct option
-{
-# if (defined __STDC__ && __STDC__) || defined __cplusplus
-  const char *name;
-# else
-  char *name;
-# endif
-  /* has_arg can't be an enum because some compilers complain about
-     type mismatches in all the code that assumes it is an int.  */
-  int has_arg;
-  int *flag;
-  int val;
-};
-
-/* Names for the values of the `has_arg' field of `struct option'.  */
-
-# define no_argument		0
-# define required_argument	1
-# define optional_argument	2
-#endif	/* need getopt */
-
-
-/* Get definitions and prototypes for functions to process the
-   arguments in ARGV (ARGC of them, minus the program name) for
-   options given in OPTS.
-
-   Return the option character from OPTS just read.  Return -1 when
-   there are no more options.  For unrecognized options, or options
-   missing arguments, `optopt' is set to the option letter, and '?' is
-   returned.
-
-   The OPTS string is a list of characters which are recognized option
-   letters, optionally followed by colons, specifying that that letter
-   takes an argument, to be placed in `optarg'.
-
-   If a letter in OPTS is followed by two colons, its argument is
-   optional.  This behavior is specific to the GNU `getopt'.
-
-   The argument `--' causes premature termination of argument
-   scanning, explicitly telling `getopt' that there are no more
-   options.
-
-   If OPTS begins with `--', then non-option arguments are treated as
-   arguments to the option '\0'.  This behavior is specific to the GNU
-   `getopt'.  */
-
-#if (defined __STDC__ && __STDC__) || defined __cplusplus
-# ifdef __GNU_LIBRARY__
-/* Many other libraries have conflicting prototypes for getopt, with
-   differences in the consts, in stdlib.h.  To avoid compilation
-   errors, only prototype getopt for the GNU C library.  */
-extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
-# else /* not __GNU_LIBRARY__ */
-extern int getopt ();
-# endif /* __GNU_LIBRARY__ */
-
-# ifndef __need_getopt
-extern int getopt_long (int ___argc, char *const *___argv,
-			const char *__shortopts,
-		        const struct option *__longopts, int *__longind);
-extern int getopt_long_only (int ___argc, char *const *___argv,
-			     const char *__shortopts,
-		             const struct option *__longopts, int *__longind);
-
-/* Internal only.  Users should not call this directly.  */
-extern int _getopt_internal (int ___argc, char *const *___argv,
-			     const char *__shortopts,
-		             const struct option *__longopts, int *__longind,
-			     int __long_only);
-# endif
-#else /* not __STDC__ */
-extern int getopt ();
-# ifndef __need_getopt
-extern int getopt_long ();
-extern int getopt_long_only ();
-
-extern int _getopt_internal ();
-# endif
-#endif /* __STDC__ */
-
-#ifdef	__cplusplus
-}
-#endif
-
-/* Make sure we later can get all the definitions and declarations.  */
-#undef __need_getopt
-
-#endif /* getopt.h */


=====================================
src/getopt1.c deleted
=====================================
@@ -1,196 +0,0 @@
-/* getopt_long and getopt_long_only entry points for GNU getopt.
-   Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
-     Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#ifdef _LIBC
-# include <getopt.h>
-#else
-# include "getopt.h"
-#endif
-
-#if !defined __STDC__ || !__STDC__
-/* This is a separate conditional since some stdc systems
-   reject `defined (const)'.  */
-#ifndef const
-#define const
-#endif
-#endif
-
-#include <stdio.h>
-
-/* Comment out all this code if we are using the GNU C Library, and are not
-   actually compiling the library itself.  This code is part of the GNU C
-   Library, but also included in many other GNU distributions.  Compiling
-   and linking in this code is a waste when using the GNU C library
-   (especially if it is a shared library).  Rather than having every GNU
-   program understand `configure --with-gnu-libc' and omit the object files,
-   it is simpler to just do this in the source for each such file.  */
-
-#define GETOPT_INTERFACE_VERSION 2
-#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
-#include <gnu-versions.h>
-#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
-#define ELIDE_CODE
-#endif
-#endif
-
-#ifndef ELIDE_CODE
-
-
-/* This needs to come after some library #include
-   to get __GNU_LIBRARY__ defined.  */
-#ifdef __GNU_LIBRARY__
-#include <stdlib.h>
-#endif
-
-#ifndef	NULL
-#define NULL 0
-#endif
-
-int
-getopt_long (argc, argv, options, long_options, opt_index)
-     int argc;
-     char *const *argv;
-     const char *options;
-     const struct option *long_options;
-     int *opt_index;
-{
-  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
-}
-
-/* Like getopt_long, but '-' as well as '--' can indicate a long option.
-   If an option that starts with '-' (not '--') doesn't match a long option,
-   but does match a short option, it is parsed as a short option
-   instead.  */
-
-int
-getopt_long_only (argc, argv, options, long_options, opt_index)
-     int argc;
-     char *const *argv;
-     const char *options;
-     const struct option *long_options;
-     int *opt_index;
-{
-  return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
-}
-
-# ifdef _LIBC
-libc_hidden_def (getopt_long)
-libc_hidden_def (getopt_long_only)
-# endif
-
-#endif	/* Not ELIDE_CODE.  */
-
-#ifdef TEST
-
-#include <stdio.h>
-
-int
-main (argc, argv)
-     int argc;
-     char **argv;
-{
-  int c;
-  int digit_optind = 0;
-
-  while (1)
-    {
-      int this_option_optind = optind ? optind : 1;
-      int option_index = 0;
-      static struct option long_options[] =
-      {
-	{"add", 1, 0, 0},
-	{"append", 0, 0, 0},
-	{"delete", 1, 0, 0},
-	{"verbose", 0, 0, 0},
-	{"create", 0, 0, 0},
-	{"file", 1, 0, 0},
-	{0, 0, 0, 0}
-      };
-
-      c = getopt_long (argc, argv, "abc:d:0123456789",
-		       long_options, &option_index);
-      if (c == -1)
-	break;
-
-      switch (c)
-	{
-	case 0:
-	  printf ("option %s", long_options[option_index].name);
-	  if (optarg)
-	    printf (" with arg %s", optarg);
-	  printf ("\n");
-	  break;
-
-	case '0':
-	case '1':
-	case '2':
-	case '3':
-	case '4':
-	case '5':
-	case '6':
-	case '7':
-	case '8':
-	case '9':
-	  if (digit_optind != 0 && digit_optind != this_option_optind)
-	    printf ("digits occur in two different argv-elements.\n");
-	  digit_optind = this_option_optind;
-	  printf ("option %c\n", c);
-	  break;
-
-	case 'a':
-	  printf ("option a\n");
-	  break;
-
-	case 'b':
-	  printf ("option b\n");
-	  break;
-
-	case 'c':
-	  printf ("option c with value `%s'\n", optarg);
-	  break;
-
-	case 'd':
-	  printf ("option d with value `%s'\n", optarg);
-	  break;
-
-	case '?':
-	  break;
-
-	default:
-	  printf ("?? getopt returned character code 0%o ??\n", c);
-	}
-    }
-
-  if (optind < argc)
-    {
-      printf ("non-option ARGV-elements: ");
-      while (optind < argc)
-	printf ("%s ", argv[optind++]);
-      printf ("\n");
-    }
-
-  exit (0);
-}
-
-#endif /* TEST */


=====================================
src/gmap.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: gmap.c 222314 2020-04-05 17:18:05Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -324,7 +324,9 @@ static int min_matches;
   bool multiple_sequences_p = false;
 #endif
 
-static bool sharedp = true;
+static bool sharedp = false;
+static bool preload_shared_memory_p = false;
+static bool unload_shared_memory_p = false;
 static bool expand_offsets_p = false;
 
 #ifdef HAVE_MMAP
@@ -537,6 +539,9 @@ static struct option long_options[] = {
   
 
   /* Compute options */
+  {"use-shared-memory", required_argument, 0, 0}, /* sharedp */
+  {"preload-shared-memory", no_argument, 0, 0},	  /* preload_shared_memory_p */
+  {"unload-shared-memory", no_argument, 0, 0},	  /* unload_shared_memory_p */
 #ifdef HAVE_MMAP
   {"batch", required_argument, 0, 'B'}, /* offsetsstrm_access, positions_access, genome_access */
 #endif
@@ -5722,6 +5727,22 @@ parse_command_line (int argc, char *argv[], int optind) {
       } else if (!strcmp(long_name,"time")) {
 	timingp = true;
 
+      } else if (!strcmp(long_name,"use-shared-memory")) {
+	if (!strcmp(optarg,"1")) {
+	  sharedp = true;
+	} else if (!strcmp(optarg,"0")) {
+	  sharedp = false;
+	} else {
+	  fprintf(stderr,"--use-shared-memory flag must be 0 or 1\n");
+	  return 9;
+	}
+
+      } else if (!strcmp(long_name,"preload-shared-memory")) {
+	preload_shared_memory_p = true;
+
+      } else if (!strcmp(long_name,"unload-shared-memory")) {
+	unload_shared_memory_p = true;
+
       } else if (!strcmp(long_name,"expand-offsets")) {
 	fprintf(stderr,"Note: --expand-offsets flag is no longer supported.  With the latest algorithms, it doesn't improve speed much.  Ignoring this flag");
 
@@ -6186,6 +6207,10 @@ parse_command_line (int argc, char *argv[], int optind) {
 	printtype = SPLICESITES;
       } else if (!strcmp(optarg,"introns")) {
 	printtype = INTRONS;
+      } else if (!strcmp(optarg,"mask_introns")) {
+	printtype = MASK_INTRONS;
+      } else if (!strcmp(optarg,"mask_utr_introns")) {
+	printtype = MASK_UTR_INTRONS;
       } else if (!strcmp(optarg,"samse")) {
 	printtype = SAM;
 	sam_paired_p = false;
@@ -6216,6 +6241,8 @@ parse_command_line (int argc, char *argv[], int optind) {
 	fprintf(stderr,"  psl\n");
 	fprintf(stderr,"  splicesites (6)\n");
 	fprintf(stderr,"  introns\n");
+	fprintf(stderr,"  mask_introns\n");
+	fprintf(stderr,"  mask_utr_introns\n");
 	fprintf(stderr,"  samse\n");
 	fprintf(stderr,"  sampe\n");
 	fprintf(stderr,"  bedpe\n");
@@ -6824,13 +6851,13 @@ main (int argc, char *argv[]) {
 				     &alphabet,&alphabet_size,required_alphabet,
 				     required_index1part,required_index1interval,
 				     expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
-				     multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false);
+				     multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p);
     indexdb_rev = Indexdb_new_genome(&index1part_aa,&index1interval,
 				     genomesubdir,fileroot,REV_FILESUFFIX,/*snps_root*/NULL,
 				     &alphabet,&alphabet_size,required_alphabet,
 				     required_index1part,required_index1interval,
 				     expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
-				     multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false);
+				     multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p);
 
     if (indexdb_fwd == NULL || indexdb_rev == NULL) {
       fprintf(stderr,"Cannot find offsets file %s.%s*offsets or %s.%s*offsets.\n",
@@ -6889,7 +6916,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"metct",/*snps_root*/NULL,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find metct index file.  Need to run cmetindex first\n");
 	exit(9);
       }
@@ -6898,7 +6925,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"metga",/*snps_root*/NULL,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find metga index file.  Need to run cmetindex first\n");
 	exit(9);
       }
@@ -6914,7 +6941,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find a2iag index file.  Need to run atoiindex first\n");
 	exit(9);
       }
@@ -6923,7 +6950,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find a2itc index file.  Need to run atoiindex first\n");
 	exit(9);
       }
@@ -6939,7 +6966,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find a2itc index file.  Need to run atoiindex first\n");
 	exit(9);
       }
@@ -6948,7 +6975,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find a2iag index file.  Need to run atoiindex first\n");
 	exit(9);
       }
@@ -6959,7 +6986,7 @@ main (int argc, char *argv[]) {
 					    genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP\n",fileroot,IDX_FILESUFFIX);
 	exit(9);
       }
@@ -6970,7 +6997,7 @@ main (int argc, char *argv[]) {
 					genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
 					required_local1part,required_local1interval,
 					locoffsetsstrm_access,locpositions_access,
-					sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
+					sharedp,multiple_sequences_p,unload_shared_memory_p)) == NULL) {
 
 	/* Not using localdb currently, since Oligoindex_hr seems to be faster than Oligoindex_localdb */
 	fprintf(stderr,"Warning: Cannot find localdb files.  GMAP will run on this older index, but there is a newer index format available\n");
@@ -7036,7 +7063,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"metct",snps_root,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find metct index file.  Need to run cmetindex first\n");
 	exit(9);
       }
@@ -7044,7 +7071,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"metga",snps_root,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find metga index file.  Need to run cmetindex first\n");
 	exit(9);
       }
@@ -7060,7 +7087,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find a2iag index file.  Need to run atoiindex first\n");
 	exit(9);
       }
@@ -7068,7 +7095,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find a2itc index file.  Need to run atoiindex first\n");
 	exit(9);
       }
@@ -7084,7 +7111,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find a2itc index file.  Need to run atoiindex first\n");
 	exit(9);
       }
@@ -7092,7 +7119,7 @@ main (int argc, char *argv[]) {
 					    modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
 					    required_index1part,required_index1interval,
 					    expand_offsets_p,offsetsstrm_access,positions_access,
-					    sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+					    sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
 	fprintf(stderr,"Cannot find a2iag index file.  Need to run atoiindex first\n");
 	exit(9);
       }
@@ -7102,7 +7129,7 @@ main (int argc, char *argv[]) {
 				       snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
 				       required_index1part,required_index1interval,
 				       expand_offsets_p,offsetsstrm_access,positions_access,
-				       sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false);
+				       sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p);
       if (indexdb_fwd == NULL) {
 	fprintf(stderr,"Cannot find snps index file for %s in directory %s\n",snps_root,snpsdir);
 	exit(9);
@@ -7651,6 +7678,11 @@ Usage: gmap [OPTIONS...] <FASTA files...>, or\n\
 ");
 #endif
 
+  fprintf(stdout,"\
+  --use-shared-memory=INT        If 1, then allocated memory is shared among all processes on this node\n\
+                                   If 0 (default), then each process has private allocated memory\n\
+");
+
     fprintf(stdout,"\
   --nosplicing                   Turns off splicing (useful for aligning genomic sequences\n\
                                    onto a genome)\n\
@@ -7793,6 +7825,8 @@ Output types\n\
     fprintf(stdout,"\
   -f, --format=INT               Other format for output (also note the -A and -S options\n\
                                    and other options listed under Output types):\n\
+                                   mask_introns,\n\
+                                   mask_utr_introns,\n\
                                    psl_pro (or 0) = PSL format in protein coords,\n\
                                    psl_nt (or 1) = PSL format in nucleotide coords,\n\
                                    gff3_gene (or 2) = GFF3 gene format,\n\
@@ -7806,6 +7840,8 @@ Output types\n\
     fprintf(stdout,"\
   -f, --format=INT               Other format for output (also note the -A and -S options\n\
                                    and other options listed under Output types):\n\
+                                   mask_introns,\n\
+                                   mask_utr_introns,\n\
                                    psl (or 1) = PSL (BLAT) format,\n\
                                    gff3_gene (or 2) = GFF3 gene format,\n\
                                    gff3_match_cdna (or 3) = GFF3 cDNA_match format,\n\


=====================================
src/gmapindex.c
=====================================
@@ -1,10 +1,12 @@
-static char rcsid[] = "$Id: gmapindex.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: gmapindex.c 222388 2020-04-09 05:36:24Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
 
 #include <stdio.h>
 #include <stdlib.h>
+#include <libgen.h>		/* For basename */
+
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
@@ -91,10 +93,9 @@ typedef Tableuint_T Table_chrpos_T;
 
 
 /* Program variables */
-typedef enum {NONE, AUXFILES, GENOME, CONCATENATE_GENOMES, UNSHUFFLE, COUNT,
+typedef enum {NONE, AUXFILES, GENOME, COMPRESS_GENOMES, CONCATENATE_GENOMES, UNSHUFFLE, COUNT,
 	      OFFSETS, POSITIONS, REGIONDB, CONCATENATE_REGIONDBS} Action_T;
 static Action_T action = NONE;
-static char *sourcedirs = ".";
 static char *destdir = ".";
 static char *fileroot = NULL;
 static int compression_types = BITPACK64_COMPRESSION;
@@ -1338,7 +1339,7 @@ main (int argc, char *argv[]) {
   char *typestring;
   Univcoord_T *genomelengths, genomelength, totalnts;
   char *chromosomefile, *iitfile, *positionsfile_high, *positionsfile_low, interval_char, region_interval_char;
-  char *sourcedirs_copy, *sourcedir, **inputfiles, *outputfile;
+  char *source_genome, *source_dbroot, **inputfiles, *outputfile;
   int nfiles;
 
   Indexdb_filenames_T ifilenames;
@@ -1359,9 +1360,8 @@ main (int argc, char *argv[]) {
   extern char *optarg;
   char *string;
 
-  while ((c = getopt(argc,argv,"F:D:d:z:k:q:j:ArlGCUNHOPQRWw:e:Ss:n:m9")) != -1) {
+  while ((c = getopt(argc,argv,"D:d:z:k:q:j:ArlGZCUNHOPQRWw:e:Ss:n:m9")) != -1) {
     switch (c) {
-    case 'F': sourcedirs = optarg; break;
     case 'D': destdir = optarg; break;
     case 'd': fileroot = optarg; break;
 
@@ -1395,6 +1395,7 @@ main (int argc, char *argv[]) {
     case 'r': rawp = true; break;
     case 'l': genome_lc_p = true; break;
     case 'G': action = GENOME; break;
+    case 'Z': action = COMPRESS_GENOMES; break;
     case 'C': action = CONCATENATE_GENOMES; break;
     case 'U': action = UNSHUFFLE; break;
     case 'N': action = COUNT; break;
@@ -1462,7 +1463,7 @@ main (int argc, char *argv[]) {
     exit(9);
   }
 
-  if (action != UNSHUFFLE) {
+  if (action != COMPRESS_GENOMES && action != UNSHUFFLE) {
     if (fileroot == NULL) {
       fprintf(stderr,"Missing name of genome database.  Must specify with -d flag.\n");
       exit(9);
@@ -1644,9 +1645,17 @@ main (int argc, char *argv[]) {
     Univ_IIT_free(&chromosome_iit);
     Univ_IIT_free(&contig_iit);
 
+  } else if (action == COMPRESS_GENOMES) {
+
+    if (argc == 0) {
+      Compress_compress(/*inputfiles*/NULL,/*nfiles*/0,/*stdin_p*/true);
+    } else {
+      Compress_compress(/*inputfiles*/argv,/*nfiles*/argc,/*stdin_p*/false);
+    }
+
   } else if (action == CONCATENATE_GENOMES) {
-    /* Usage: gmapindex [-F <sourcedirs>] [-D <destdir>] -d <dbname> -C <input_genomes...>
-       Requires <sourcedir>/<input_genome>.chromosome.iit for each input genome on the command line to get genomelengths
+    /* Usage: gmapindex [-D <destdir>] -d <dbname> -C </path/to/input_genomes...>
+       Requires <input_genome>.chromosome.iit for each input genome on the command line to get genomelengths
        Creates <destdir>/<dbname>.genomecomp (horizontal format in blocks of 32).
        Then the unshuffle command creates another version (vertical format in blocks of 128).  */
 
@@ -1654,39 +1663,31 @@ main (int argc, char *argv[]) {
     inputfiles = (char **) MALLOC(nfiles*sizeof(char *));
     genomelengths = (Univcoord_T *) MALLOC(nfiles*sizeof(Univcoord_T));
 
-    sourcedirs_copy = (char *) MALLOC((strlen(sourcedirs)+1)*sizeof(sourcedirs));
     for (i = 0; i < nfiles; i++) {
-      strcpy(sourcedirs_copy,sourcedirs);
-
-      if ((sourcedir = strtok(sourcedirs_copy,",")) != NULL) {
-	chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
-					 strlen(argv[i])+strlen(".chromosome.iit")+1,sizeof(char));
-	sprintf(chromosomefile,"%s/%s/%s.chromosome.iit",sourcedir,argv[i],argv[i]);
-      }
-      while (sourcedir != NULL && (chromosome_iit = Univ_IIT_read(chromosomefile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
-	FREE(chromosomefile);
-	if ((sourcedir = strtok(NULL,",")) != NULL) {
-	  chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
-					   strlen(argv[i])+strlen(".chromosome.iit")+1,sizeof(char));
-	  sprintf(chromosomefile,"%s/%s/%s.chromosome.iit",sourcedir,argv[i],argv[i]);
-	}
-      }
-      if (sourcedir == NULL) {
-	fprintf(stderr,"Could not find chromosome IIT file for %s in any of the given sourcedirs\n",argv[i]);
+      source_genome = MALLOC((strlen(argv[i])+1)*sizeof(char));
+      strcpy(source_genome,argv[i]);
+      source_dbroot = basename(source_genome);
+
+      chromosomefile = (char *) CALLOC(strlen(argv[i])+strlen("/")+
+				       strlen(source_dbroot)+strlen(".chromosome.iit")+1,sizeof(char));
+      sprintf(chromosomefile,"%s/%s.chromosome.iit",argv[i],source_dbroot);
+      if ((chromosome_iit = Univ_IIT_read(chromosomefile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
+	fprintf(stderr,"Could not find chromosome IIT file for the input genome %s\n",argv[i]);
+	fprintf(stderr,"Expecting to find %s\n",chromosomefile);
 	exit(9);
       } else {
 	FREE(chromosomefile);
       }
 
-      inputfiles[i] = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
-				      strlen(argv[i])+strlen(".genomecomp")+1,sizeof(char));
-      sprintf(inputfiles[i],"%s/%s/%s.genomecomp",sourcedir,argv[i],argv[i]);
+      inputfiles[i] = (char *) CALLOC(strlen(argv[i])+strlen("/")+
+				      strlen(source_dbroot)+strlen(".genomecomp")+1,sizeof(char));
+      sprintf(inputfiles[i],"%s/%s.genomecomp",argv[i],source_dbroot);
 
       genomelengths[i] = Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias_p*/true);
       fprintf(stderr,"genomelength of %s is %llu bp\n",argv[i],genomelengths[i]);
       Univ_IIT_free(&chromosome_iit);
+      FREE(source_genome);
     }
-    FREE(sourcedirs_copy);
 
     outputfile = (char *) CALLOC(strlen(destdir)+strlen("/")+
 				 strlen(fileroot)+strlen(".genomecomp")+1,sizeof(char));
@@ -1897,8 +1898,8 @@ main (int argc, char *argv[]) {
     Univ_IIT_free(&chromosome_iit);
 
   } else if (action == CONCATENATE_REGIONDBS) {
-    /* Usage: gmapindex [-F <sourcedirs>] [-D <destdir>] -d <dbname> -R <input_genomes...>
-       Requires <sourcedir>/<input_genome>.chromosome.iit for each input genome on the command line to get genomelengths
+    /* Usage: gmapindex [-D <destdir>] -d <dbname> -R </path/to/input_genomes...>
+       Requires <input_genome>.chromosome.iit for each input genome on the command line to get genomelengths
        Creates <destdir>/<dbname>.genomecomp (horizontal format in blocks of 32).
        Then the unshuffle command creates another version (vertical format in blocks of 128).  */
 
@@ -1906,38 +1907,29 @@ main (int argc, char *argv[]) {
     inputfiles = (char **) MALLOC(nfiles*sizeof(char *));
     genomelengths = (Univcoord_T *) MALLOC(nfiles*sizeof(Univcoord_T));
 
-    sourcedirs_copy = (char *) MALLOC((strlen(sourcedirs)+1)*sizeof(sourcedirs));
     for (i = 0; i < nfiles; i++) {
-      strcpy(sourcedirs_copy,sourcedirs);
-
-      if ((sourcedir = strtok(sourcedirs_copy,",")) != NULL) {
-	chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
-					 strlen(argv[i])+strlen(".chromosome.iit")+1,sizeof(char));
-	sprintf(chromosomefile,"%s/%s/%s.chromosome.iit",sourcedir,argv[i],argv[i]);
-      }
-      while (sourcedir != NULL && (chromosome_iit = Univ_IIT_read(chromosomefile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
-	FREE(chromosomefile);
-	if ((sourcedir = strtok(NULL,",")) != NULL) {
-	  chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
-					   strlen(argv[i])+strlen(".chromosome.iit")+1,sizeof(char));
-	  sprintf(chromosomefile,"%s/%s/%s.chromosome.iit",sourcedir,argv[i],argv[i]);
-	}
-      }
-      if (sourcedir == NULL) {
-	fprintf(stderr,"Could not find chromosome IIT file for %s in any of the given sourcedirs\n",argv[i]);
+      source_genome = MALLOC((strlen(argv[i])+1)*sizeof(char));
+      strcpy(source_genome,argv[i]);
+      source_dbroot = basename(source_genome);
+
+      chromosomefile = (char *) CALLOC(strlen(argv[i])+strlen("/")+
+				       strlen(source_dbroot)+strlen(".chromosome.iit")+1,sizeof(char));
+      sprintf(chromosomefile,"%s/%s.chromosome.iit",argv[i],source_dbroot);
+      if ((chromosome_iit = Univ_IIT_read(chromosomefile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
+	fprintf(stderr,"Could not find chromosome IIT file for the input genome %s\n",argv[i]);
+	fprintf(stderr,"Expecting to find %s\n",chromosomefile);
 	exit(9);
       } else {
 	FREE(chromosomefile);
       }
 
-      genomesubdir = (char *) MALLOC((strlen(sourcedir)+strlen("/")+strlen(argv[i])+1)*sizeof(char));
-      sprintf(genomesubdir,"%s/%s",sourcedir,argv[i]);
-
       if ((rfilenames = Regiondb_get_filenames(&region1part,&region1interval,
-					       genomesubdir,argv[i],IDX_FILESUFFIX,/*snps_root*/NULL,
+					       /*genomesubdir*/argv[i],/*fileroot*/source_dbroot,
+					       IDX_FILESUFFIX,/*snps_root*/NULL,
 					       required_region1part,required_region1interval)) == NULL) {
 	fprintf(stderr,"Could not find a matching regiondb file for %s\n",argv[i]);
 	exit(9);
+
       } else {
 	if (required_region1part == 0) {
 	  required_region1part = region1part;
@@ -1946,20 +1938,20 @@ main (int argc, char *argv[]) {
 	  required_region1interval = region1interval;
 	}
 
-	inputfiles[i] = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
-					strlen(argv[i])+strlen(".refXXXregiondb")+1,sizeof(char));
-	sprintf(inputfiles[i],"%s/%s/%s.ref%02d%01dregiondb",
-		sourcedir,argv[i],argv[i],required_region1part,required_region1interval);
+	inputfiles[i] = (char *) CALLOC(strlen(argv[i])+strlen("/")+
+					strlen(source_dbroot)+strlen(".refXXXregiondb")+1,sizeof(char));
+	sprintf(inputfiles[i],"%s/%s.ref%02d%01dregiondb",
+		argv[i],source_dbroot,required_region1part,required_region1interval);
 	
 	genomelengths[i] = Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias_p*/true);
 	fprintf(stderr,"genomelength of %s is %llu bp\n",argv[i],genomelengths[i]);
 	Univ_IIT_free(&chromosome_iit);
 	
 	Regiondb_filenames_free(&rfilenames);
-	FREE(genomesubdir);
       }
+
+      FREE(source_genome);
     }
-    FREE(sourcedirs_copy);
 
     outputfile = (char *) CALLOC(strlen(destdir)+strlen("/")+
 				 strlen(fileroot)+strlen(".refXXXregiondb")+1,sizeof(char));


=====================================
src/gsnap.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: gsnap.c 222315 2020-04-05 17:18:33Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -281,7 +281,7 @@ static int trim_indel_score = -2; /* was -4 */
 
 
 static bool multiple_sequences_p;
-static bool sharedp = true;
+static bool sharedp = false;
 static bool preload_shared_memory_p = false;
 static bool unload_shared_memory_p = false;
 static bool expand_offsets_p = false;
@@ -3995,11 +3995,7 @@ main (int argc, char *argv[]) {
 #endif
 
   if (preload_shared_memory_p == true || unload_shared_memory_p == true) {
-#if 0
-    /* To avoid time-consuming pre-load */
-    sarray_access = USE_MMAP_ONLY;
-    lcp_access = USE_MMAP_ONLY;
-#endif
+    /* No need for pre-load */
 
   } else if (multiple_sequences_p == true) {
 #if 0
@@ -4402,8 +4398,8 @@ is still designed to be fast.\n\
 ");
 #endif
   fprintf(stdout,"\
-  --use-shared-memory=INT        If 1 (default), then allocated memory is shared among all processes\n\
-                                   on this node.  If 0, then each process has private allocated memory\n\
+  --use-shared-memory=INT        If 1, then allocated memory is shared among all processes on this node\n\
+                                   If 0 (default), then each process has private allocated memory\n\
   --preload-shared-memory        Load files indicated by --batch mode into shared memory for use by other\n\
                                    GMAP/GSNAP processes on this node, and then exit.  Ignore any input files.\n\
   --unload-shared-memory         Unload files indicated by --batch mode into shared memory, or allow them\n\


=====================================
src/iit-read-univ.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read-univ.c 218400 2019-02-19 00:55:54Z twu $";
+static char rcsid[] = "$Id: iit-read-univ.c 222390 2020-04-10 12:44:01Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -1440,78 +1440,49 @@ read_words_debug (size_t offset, size_t filesize, FILE *fp, T new) {
 /* This function only assigns pointers.  Subsequent accesses to
    memory, other than char *, still need to be read correctly
    by bigendian machines */
+/* Previously allowed read/write access, but we can assume read-only access */
 #ifdef HAVE_MMAP
 static bool
 mmap_annotations (char *filename, T new, bool readonlyp) {
   int remainder;
 
-  if (readonlyp == true) {
-    if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
+  assert(readonlyp == true);
+
+  if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
       fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
       exit(9);
-    }
+  }
 
-    new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
+  new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
 						       /*randomp*/true);
-    debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
-    new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
-    new->labelorder_length += (size_t) remainder;
+  debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
+  new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
+  new->labelorder_length += (size_t) remainder;
 
-    new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
+  new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
 							  /*randomp*/true);
-    debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
-    new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
-    new->labelpointers_length += (size_t) remainder;
+  debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
+  new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
+  new->labelpointers_length += (size_t) remainder;
 
-    new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
+  new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
 						  /*randomp*/true);
-    debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
-    new->labels = (char *) &(new->label_mmap[remainder]);
-    new->label_length += (size_t) remainder;
-
-    new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
-							  /*randomp*/true);
-    debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
-    new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
-    new->annotpointers_length += (size_t) remainder;
+  debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
+  new->labels = (char *) &(new->label_mmap[remainder]);
+  new->label_length += (size_t) remainder;
+
+  new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
+							/*randomp*/true);
+  debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
+  new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
+  new->annotpointers_length += (size_t) remainder;
+  
+  new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
+						/*randomp*/true);
+  debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
+  new->annotations = (char *) &(new->annot_mmap[remainder]);
+  new->annot_length += (size_t) remainder;
 
-    new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
-						  /*randomp*/true);
-    debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
-    new->annotations = (char *) &(new->annot_mmap[remainder]);
-    new->annot_length += (size_t) remainder;
-
-  } else {
-    if ((new->fd = open(filename,O_RDWR,0764)) < 0) {
-      fprintf(stderr,"Error: can't open file %s with open for reading/writing\n",filename);
-      exit(9);
-    }
-
-    new->labelorder_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
-							  /*randomp*/true);
-    new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
-    new->labelorder_length += (size_t) remainder;
-
-    new->labelpointers_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
-							     /*randomp*/true);
-    new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
-    new->labelpointers_length += (size_t) remainder;
-
-    new->label_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->label_offset,new->label_length,
-						     /*randomp*/true);
-    new->labels = (char *) &(new->label_mmap[remainder]);
-    new->label_length += (size_t) remainder;
-
-    new->annotpointers_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
-							     /*randomp*/true);
-    new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
-    new->annotpointers_length += (size_t) remainder;
-
-    new->annot_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->annot_offset,new->annot_length,
-						     /*randomp*/true);
-    new->annotations = (char *) &(new->annot_mmap[remainder]);
-    new->annot_length += (size_t) remainder;
-  }
 
   if (new->labelorder == NULL || new->labelpointers == NULL || new->labels == NULL) {
     fprintf(stderr,"Memory mapping failed in reading IIT file %s.  Using slow file IO instead.\n",filename);


=====================================
src/iit-read.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: iit-read.c 222390 2020-04-10 12:44:01Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -2350,132 +2350,83 @@ read_words_debug (size_t offset, size_t filesize, FILE *fp, T new) {
 /* This function only assigns pointers.  Subsequent accesses to
    memory, other than char *, still need to be read correctly
    by bigendian machines */
+/* Previously allowed read/write access, but we can assume read-only access */
 #ifdef HAVE_MMAP
 static bool
 mmap_annotations (char *filename, T new, bool readonlyp) {
   int remainder;
 
-  if (readonlyp == true) {
-    if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
-      fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
-      exit(9);
-    }
+  assert(readonlyp == true);
 
-    if (new->valuep == true) {
-      new->valueorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->valueorder_offset,new->valueorder_length,
-							 /*randomp*/true);
-      debug(fprintf(stderr,"valueorder_mmap is %p\n",new->valueorder_mmap));
-      new->valueorder = (int *) &(new->valueorder_mmap[remainder]);
-      new->valueorder_length += (size_t) remainder;
-      
-      new->value_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->value_offset,new->value_length,
-						    /*randomp*/true);
-      debug(fprintf(stderr,"values_mmap is %p\n",new->value_mmap));
-      new->values = (double *) &(new->value_mmap[remainder]);
-      new->value_length += (size_t) remainder;
-    }
+  if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
+    fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
+    exit(9);
+  }
 
-    new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
+  if (new->valuep == true) {
+    new->valueorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->valueorder_offset,new->valueorder_length,
 						       /*randomp*/true);
-    debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
-    new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
-    new->labelorder_length += (size_t) remainder;
-
-    new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
-							  /*randomp*/true);
-    debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
-#ifdef HAVE_64_BIT
-    if (new->label_pointers_8p == true) {
-      new->labelpointers8 = (UINT8 *) &(new->labelpointers_mmap[remainder]);
-      new->labelpointers = (UINT4 *) NULL;
-    } else {
-      new->labelpointers8 = (UINT8 *) NULL;
-      new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
-    }
-#else
-    new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
-#endif
-    new->labelpointers_length += (size_t) remainder;
-
-    new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
+    debug(fprintf(stderr,"valueorder_mmap is %p\n",new->valueorder_mmap));
+    new->valueorder = (int *) &(new->valueorder_mmap[remainder]);
+    new->valueorder_length += (size_t) remainder;
+    
+    new->value_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->value_offset,new->value_length,
 						  /*randomp*/true);
-    debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
-    new->labels = (char *) &(new->label_mmap[remainder]);
-    new->label_length += (size_t) remainder;
-
-    new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
-							  /*randomp*/true);
-    debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
+    debug(fprintf(stderr,"values_mmap is %p\n",new->value_mmap));
+    new->values = (double *) &(new->value_mmap[remainder]);
+    new->value_length += (size_t) remainder;
+  }
+  
+  new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
+						     /*randomp*/true);
+  debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
+  new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
+  new->labelorder_length += (size_t) remainder;
+  
+  new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
+							/*randomp*/true);
+  debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
 #ifdef HAVE_64_BIT
-    if (new->annot_pointers_8p == true) {
-      new->annotpointers8 = (UINT8 *) &(new->annotpointers_mmap[remainder]);
-      new->annotpointers = (UINT4 *) NULL;
-    } else {
-      new->annotpointers8 = (UINT8 *) NULL;
-      new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
-    }
-#else
-    new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
-#endif
-    new->annotpointers_length += (size_t) remainder;
-
-    new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
-						  /*randomp*/true);
-    debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
-    new->annotations = (char *) &(new->annot_mmap[remainder]);
-    new->annot_length += (size_t) remainder;
-
+  if (new->label_pointers_8p == true) {
+    new->labelpointers8 = (UINT8 *) &(new->labelpointers_mmap[remainder]);
+    new->labelpointers = (UINT4 *) NULL;
   } else {
-    if ((new->fd = open(filename,O_RDWR,0764)) < 0) {
-      fprintf(stderr,"Error: can't open file %s with open for reading/writing\n",filename);
-      exit(9);
-    }
-
-    new->labelorder_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
-							  /*randomp*/true);
-    new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
-    new->labelorder_length += (size_t) remainder;
-
-    new->labelpointers_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
-							     /*randomp*/true);
-#ifdef HAVE_64_BIT
-    if (new->label_pointers_8p == true) {
-      new->labelpointers8 = (UINT8 *) &(new->labelpointers_mmap[remainder]);
-      new->labelpointers = (UINT4 *) NULL;
-    } else {
-      new->labelpointers8 = (UINT8 *) NULL;
-      new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
-    }
-#else
+    new->labelpointers8 = (UINT8 *) NULL;
     new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
+  }
+#else
+  new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
 #endif
-    new->labelpointers_length += (size_t) remainder;
-
-    new->label_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->label_offset,new->label_length,
-						     /*randomp*/true);
-    new->labels = (char *) &(new->label_mmap[remainder]);
-    new->label_length += (size_t) remainder;
-
-    new->annotpointers_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
-							     /*randomp*/true);
+  new->labelpointers_length += (size_t) remainder;
+  
+  new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
+						/*randomp*/true);
+  debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
+  new->labels = (char *) &(new->label_mmap[remainder]);
+  new->label_length += (size_t) remainder;
+  
+  new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
+							/*randomp*/true);
+  debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
 #ifdef HAVE_64_BIT
-    if (new->annot_pointers_8p == true) {
-      new->annotpointers8 = (UINT8 *) &(new->annotpointers_mmap[remainder]);
-      new->annotpointers = (UINT4 *) NULL;
-    } else {
-      new->annotpointers8 = (UINT8 *) NULL;
-      new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
-    }
-#else
+  if (new->annot_pointers_8p == true) {
+    new->annotpointers8 = (UINT8 *) &(new->annotpointers_mmap[remainder]);
+    new->annotpointers = (UINT4 *) NULL;
+  } else {
+    new->annotpointers8 = (UINT8 *) NULL;
     new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
+  }
+#else
+  new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
 #endif
-    new->annotpointers_length += (size_t) remainder;
+  new->annotpointers_length += (size_t) remainder;
+  
+  new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
+						/*randomp*/true);
+  debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
+  new->annotations = (char *) &(new->annot_mmap[remainder]);
+  new->annot_length += (size_t) remainder;
 
-    new->annot_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->annot_offset,new->annot_length,
-						     /*randomp*/true);
-    new->annotations = (char *) &(new->annot_mmap[remainder]);
-    new->annot_length += (size_t) remainder;
-  }
 
 #ifdef HAVE_64_BIT
   if (new->label_pointers_8p == true) {


=====================================
src/iit_get.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit_get.c 221574 2020-01-30 21:17:50Z twu $";
+static char rcsid[] = "$Id: iit_get.c 222203 2020-03-23 21:59:48Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -539,7 +539,8 @@ print_interval (Chrpos_T *lastcoord, long int total,
 		char *divstring, Chrpos_T coordstart, Chrpos_T coordend, 
 		int index, IIT_T iit, int ndivs, int fieldint) {
   Interval_T interval;
-  char *label, *annotation, *restofheader;
+  char *label, *annotation, *restofheader, *p, *q;
+  Chrpos_T pos;
   bool allocp;
 
   debug(printf("Entered print_interval with coordstart %u and coordend %u\n",coordstart,coordend));
@@ -595,8 +596,30 @@ print_interval (Chrpos_T *lastcoord, long int total,
     printf("%s\n",restofheader);
     if (coordstart == 0 && coordend == 0) {
       printf("%s",annotation);
+
     } else {
-      printf("%.*s\n",coordend-coordstart+1,&(annotation[coordstart-1]));
+      /* printf("%.*s\n",coordend-coordstart+1,&(annotation[coordstart-1])); */
+
+      p = annotation;
+      pos = 1;
+      while (pos < coordstart) {
+	if (*p++ != '\n') {
+	  pos++;
+	}
+      }
+
+      while (*p == '\n') {
+	p++;
+      }
+      q = p;
+      
+      while (pos < coordend) {
+	if (*++q != '\n') {
+	  pos++;
+	}
+      }
+
+      printf("%.*s\n",(int) (q-p+1),p);
     }
     if (allocp == true) {
       FREE(restofheader);


=====================================
src/iit_store.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit_store.c 221945 2020-02-21 03:16:45Z twu $";
+static char rcsid[] = "$Id: iit_store.c 222272 2020-03-27 14:29:22Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -60,6 +60,7 @@ static bool univ_format_p = false; /* IIT_write_univ used for chromosome.iit fil
 static bool gff3_format_p = false;
 static char *labelid = "ID";
 static bool fieldsp = false;
+static bool acc_only_p = false;
 static char iit_version = 0;
 
 static Sorttype_T divsort = CHROM_SORT;
@@ -70,6 +71,7 @@ static struct option long_options[] = {
   /* Input options */
   {"output", required_argument, 0, 'o'}, /* outputfile */
   {"univformat", no_argument, 0, '1'}, /* univ_format_p */
+  {"accession-only", no_argument, 0, 0},     /* acc_only_p */
   {"fields", no_argument, 0, 'F'}, /* fieldsp */
   {"gff", no_argument, 0, 'G'}, /* gff3_format_p */
   {"label", required_argument, 0, 'l'}, /* labelid */
@@ -107,6 +109,7 @@ Options\n\
   -o, --output=STRING       Name of output iit file\n\
   -1, --oldformat           Old format for intervals:\n\
                              <start> <optional end> <optional div> <optional type>\n\
+  --accession-only          Process only the first word of each FASTA header, and ignore the rest of the line\n\
   -F, --fields              Annotation consists of separate fields\n\
   -G, --gff                 Parse input file in gff3 format\n\
   -l, --label=STRING        For gff input, the feature attribute to use (default is ID)\n\
@@ -141,7 +144,9 @@ and a given type, numeric value, or both is optional.  A numeric value\n\
 allows intervals to be searched by a range of values using iit_get.\n\
 If the interval is omitted, then it is assumed to be label:1..n,\n\
 where n is the length of the sequence.  This allows for storage and retrieval\n\
-of sequences in FASTA files.\n\
+of sequences in FASTA files.  If you specify --accession-only, then it is\n\
+assumed that you are not providing intervals, and all information in the FASTA\n\
+header other than the first word (accession) will be ignored.\n\
 \n\
 Intervals may have directions.  To indicate a forward direction,\n\
 the start coordinate should be less than the end coordinate.\n\
@@ -297,7 +302,7 @@ scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelis
     strcpy(*label,acc);
   }
 
-  if (sscanf(header,">%s %s",acc,query) < 2) {
+  if (acc_only_p == true || sscanf(header,">%s %s",acc,query) < 2) {
     /* Treat query as acc:1..n */
     divstring = (char *) MALLOC((*labellength+1)*sizeof(char));
     strcpy(divstring,acc);
@@ -316,7 +321,7 @@ scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelis
     coords = (char *) NULL;
 
   } else if (!index(query,':')) {
-    debug(printf("Query %s has no div\n",query));
+    debug(printf("Query %s has no div.\n",query));
     divstring = (char *) CALLOC(1,sizeof(char));
     divstring[0] = '\0';
     coords = query;
@@ -357,7 +362,8 @@ scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelis
     debug(printf("  and coords %s as a range starting at %llu and ending at %llu\n",
 		 coords,(unsigned long long) *start,(unsigned long long) *end));
   } else {
-    fprintf(stderr,"Error parsing %s:%s.  Expecting coords (as <div>:<number>..<number>)\n",query,coords);
+    fprintf(stderr,"Error parsing %s:%s.  Expecting coords (as <div>:<number>..<number>).  Or specify --accession-only to ignore the second field\n",
+	    query,coords);
     fprintf(stderr,"Problematic line was: %s\n",header);
     exit(9);
   }
@@ -371,7 +377,7 @@ scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelis
     *value = atof(valueptr);
   }
 
-  if (sscanf(header,">%s %s %s",acc,query,tag) < 3) {
+  if (acc_only_p == true || sscanf(header,">%s %s %s",acc,query,tag) < 3) {
     *type = 0;
     *restofheader = (char *) NULL;
 
@@ -946,10 +952,23 @@ main (int argc, char *argv[]) {
   extern int optind;
   extern char *optarg;
   int long_option_index = 0;
+  const char *long_name;
 
   while ((opt = getopt_long(argc,argv,"o:1FGl:v:s:",
 			    long_options,&long_option_index)) != -1) {
     switch (opt) {
+
+    case 0:
+      long_name = long_options[long_option_index].name;
+      if (!strcmp(long_name,"accession-only")) {
+	acc_only_p = true;
+      } else {
+	/* Shouldn't reach here */
+	fprintf(stderr,"Don't recognize option %s.  For usage, run 'iit_store --help'",long_name);
+	return 9;
+      }
+      break;
+
     case 'o': outputfile = optarg; break;
     case '1': univ_format_p = true; break;
     case 'F': fieldsp = true; break;


=====================================
src/indexdb-cat.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb-cat.c 221945 2020-02-21 03:16:45Z twu $";
+static char rcsid[] = "$Id: indexdb-cat.c 222388 2020-04-09 05:36:24Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -19,6 +19,7 @@ static char rcsid[] = "$Id: indexdb-cat.c 221945 2020-02-21 03:16:45Z twu $";
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>		/* For memset */
+#include <libgen.h>		/* For basename */
 
 #include "mem.h"
 #include "fopen.h"
@@ -59,9 +60,7 @@ static char rcsid[] = "$Id: indexdb-cat.c 221945 2020-02-21 03:16:45Z twu $";
 
 
 
-static char *user_sourcedirs = NULL;
 static char *user_destdir = NULL;
-static char *sourcedirs = ".";
 static char *destdir = ".";
 static char *fileroot = NULL;
 static int compression_type;
@@ -76,7 +75,6 @@ static char *snps_root = NULL;
 
 static struct option long_options[] = {
   /* Input options */
-  {"sourcedirs", required_argument, 0, 'F'},	/* user_sourcedirs */
   {"destdir", required_argument, 0, 'D'},	/* user_destdir */
   {"kmer", required_argument, 0, 'k'}, /* required_index1part */
   {"sampling", required_argument, 0, 'q'}, /* required_index1interval */
@@ -93,7 +91,7 @@ static struct option long_options[] = {
 static void
 print_program_version () {
   fprintf(stdout,"\n");
-  fprintf(stdout,"CMETINDEX: Builds GMAP index files for methylated genomes\n");
+  fprintf(stdout,"INDEXDB_CAT: Concatenates GMAP index files\n");
   fprintf(stdout,"Part of GMAP package, version %s\n",PACKAGE_VERSION);
   fprintf(stdout,"Default gmap directory: %s\n",GMAPDB);
   fprintf(stdout,"Thomas D. Wu, Genentech, Inc.\n");
@@ -626,7 +624,7 @@ merge_indexdbs_huge_uint8 (char *new_pages_filename, char *new_pointers_filename
 int
 main (int argc, char *argv[]) {
   Indexdb_filenames_T ifilenames;
-  char *sourcedirs_copy, *sourcedir, *filename, *genomesubdir;
+  char *source_genome, *source_dbroot, *filename, *genomesubdir;
   char *new_pages_filename = NULL, *new_pointers_filename = NULL, *new_offsets_filename = NULL,
     *new_positions_high_filename = NULL, *new_positions_filename = NULL;
   char interval_char;
@@ -665,7 +663,6 @@ main (int argc, char *argv[]) {
       }
       break;
 
-    case 'F': user_sourcedirs = optarg; break;
     case 'D': user_destdir = optarg; break;
     case 'd': fileroot = optarg; break;
     case 'k': required_index1part = atoi(optarg); break;
@@ -688,51 +685,35 @@ main (int argc, char *argv[]) {
   }
 
   destdir = Datadir_find_genomedir(user_destdir);
-  if (user_sourcedirs == NULL) {
-    sourcedirs = destdir;
-  } else {
-    sourcedirs = user_sourcedirs;
-  }
-
   ngenomes = argc;
   fprintf(stderr,"Concatenating %d genomes\n",ngenomes);
 
-
   /* Chromosome IIT files.  Need to determine coord_values_8p and genomelength */
   genome_8p = (bool *) MALLOC(ngenomes*sizeof(bool));
   genome_univcoord = (Univcoord_T *) MALLOC(ngenomes*sizeof(Univcoord_T));
   total_genomelength = 0;
 
-  sourcedirs_copy = (char *) MALLOC((strlen(sourcedirs)+1)*sizeof(char));
   for (genomei = 0; genomei < ngenomes; genomei++) {
     genome_univcoord[genomei] = total_genomelength;
-
-    strcpy(sourcedirs_copy,sourcedirs);
-    if ((sourcedir = strtok(sourcedirs_copy,",")) != NULL) {
-      filename = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[genomei])+strlen("/")+
-				 strlen(argv[genomei])+strlen(".chromosome.iit")+1,sizeof(char));
-      sprintf(filename,"%s/%s/%s.chromosome.iit",sourcedir,argv[genomei],argv[genomei]);
-    }
-    while (sourcedir != NULL && (chromosome_iit = Univ_IIT_read(filename,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
-      FREE(filename);
-      if ((sourcedir = strtok(NULL,",")) != NULL) {
-	filename = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[genomei])+strlen("/")+
-				   strlen(argv[genomei])+strlen(".chromosome.iit")+1,sizeof(char));
-	sprintf(filename,"%s/%s/%s.chromosome.iit",sourcedir,argv[genomei],argv[genomei]);
-      }
-    }
-
-    if (sourcedir == NULL) {
-      fprintf(stderr,"Could not find chromosome IIT file for %s in any of the given sourcedirs\n",argv[genomei]);
+    source_genome = MALLOC((strlen(argv[genomei])+1)*sizeof(char));
+    strcpy(source_genome,argv[genomei]);
+    source_dbroot = basename(source_genome);
+
+    filename = (char *) CALLOC(strlen(argv[genomei])+strlen("/")+
+			       strlen(source_dbroot)+strlen(".chromosome.iit")+1,sizeof(char));
+    sprintf(filename,"%s/%s.chromosome.iit",argv[genomei],source_dbroot);
+    if ((chromosome_iit = Univ_IIT_read(filename,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
+      fprintf(stderr,"Could not find chromosome IIT file for the input genome %s\n",argv[genomei]);
+      fprintf(stderr,"Expecting to find %s\n",filename);
       exit(9);
     } else {
       FREE(filename);
+      FREE(source_genome);
       genome_8p[genomei] = Univ_IIT_coord_values_8p(chromosome_iit);
       total_genomelength += Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias_p*/true);
       Univ_IIT_free(&chromosome_iit);
     }
   }
-  FREE(sourcedirs_copy);
 
 #ifdef HAVE_64_BIT
   if (total_genomelength > 4294967295) {
@@ -746,28 +727,18 @@ main (int argc, char *argv[]) {
 
   indexdbs = (Indexdb_T *) MALLOC(ngenomes*sizeof(Indexdb_T));
 
-  sourcedirs_copy = (char *) MALLOC((strlen(sourcedirs)+1)*sizeof(char));
   for (genomei = 0; genomei < ngenomes; genomei++) {
     fprintf(stderr,"Reading genome %s\n",argv[genomei]);
-
-    strcpy(sourcedirs_copy,sourcedirs);
-    if ((sourcedir = strtok(sourcedirs_copy,",")) != NULL) {
-      genomesubdir = (char *) MALLOC((strlen(sourcedir)+strlen("/")+strlen(argv[genomei])+1)*sizeof(char));
-      sprintf(genomesubdir,"%s/%s",sourcedir,argv[genomei]);
-    }
-    while (sourcedir != NULL && (ifilenames = Indexdb_get_filenames(&compression_type,&index1part,&index1interval,
-								    genomesubdir,argv[genomei],IDX_FILESUFFIX,snps_root,
-								    required_index1part,required_index1interval,
-								    /*offsets_only_p*/false)) == NULL) {
-      FREE(genomesubdir);
-      if ((sourcedir = strtok(NULL,",")) != NULL) {
-	genomesubdir = (char *) MALLOC((strlen(sourcedir)+strlen("/")+strlen(argv[genomei])+1)*sizeof(char));
-	sprintf(genomesubdir,"%s/%s",sourcedir,argv[genomei]);
-      }
-    }
-
-    if (sourcedir == NULL) {
-      fprintf(stderr,"Could not find indexdb file for %s in any of the given sourcedirs\n",argv[genomei]);
+    source_genome = MALLOC((strlen(argv[genomei])+1)*sizeof(char));
+    strcpy(source_genome,argv[genomei]);
+    source_dbroot = basename(source_genome);
+
+    if ((ifilenames = Indexdb_get_filenames(&compression_type,&index1part,&index1interval,
+					    /*genomesubdir*/source_genome,/*fileroot*/source_dbroot,
+					    IDX_FILESUFFIX,snps_root,required_index1part,required_index1interval,
+					    /*offsets_only_p*/false)) == NULL) {
+      fprintf(stderr,"Could not find indexdb file for the input genome %s\n",argv[genomei]);
+      exit(9);
     } else {
       if (required_index1part == 0) {
 	required_index1part = index1part;
@@ -776,21 +747,21 @@ main (int argc, char *argv[]) {
 	required_index1interval = index1interval;
       }
       if ((indexdbs[genomei] = Indexdb_new_genome(&index1part,&index1interval,
-						  genomesubdir,argv[genomei],IDX_FILESUFFIX,/*snps_root*/NULL,
+						  /*genomesubdir*/source_genome,/*fileroot*/source_dbroot,
+						  IDX_FILESUFFIX,/*snps_root*/NULL,
 						  required_index1part,required_index1interval,
 						  /*expand_offsets_p*/false,/*offsetsstrm_access*/USE_MMAP_ONLY,
 						  /*positions_access*/USE_MMAP_ONLY,/*sharedp*/false,
 						  /*multiple_sequences_p*/false,/*preload_shared_memory_p*/false,
 						  /*unload_shared_memory_p*/false)) == NULL) {
-	fprintf(stderr,"Cannot open indexdb, kmer %d, interval %d for %s\n",
+	fprintf(stderr,"Could not open indexdb, kmer %d, interval %d for %s\n",
 		required_index1part,required_index1interval,argv[genomei]);
 	exit(9);
       }
       Indexdb_filenames_free(&ifilenames);
-      FREE(genomesubdir);
+      FREE(source_genome);
     }
   }
-  FREE(sourcedirs_copy);
 
 
   Indexdb_setup(index1part);
@@ -926,15 +897,13 @@ main (int argc, char *argv[]) {
 static void
 print_program_usage () {
   fprintf(stdout,"\
-Usage: indexdb_cat [OPTIONS...] -d <genome>\n\
+Usage: indexdb_cat [OPTIONS...] -d </path/to/genome>\n\
 \n\
 ");
 
   /* Input options */
   fprintf(stdout,"Options (must include -d)\n");
   fprintf(stdout,"\
-  -F, --sourcedirs=directories   Directory or directories where to read input index files\n\
-                                   May specify multiple directories by separating them with a comma\n\
   -D, --destdir=directory        Directory where to write cmet index files (default is\n\
                                    value of -F, if provided; otherwise the value of the\n\
                                    GMAP genome directory specified at compile time)\n\


=====================================
src/output.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: output.c 219218 2019-05-12 22:25:48Z twu $";
+static char rcsid[] = "$Id: output.c 222194 2020-03-23 13:44:44Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -912,6 +912,20 @@ Output_filestring_fromresult (Filestring_T *fp_failedinput, Result_T result, Req
 	FPRINTF(fp,"</path>\n");
       }
 
+    } else if (printtype == MASK_INTRONS) {
+      /* Print only best path */
+      PUTC('>',fp);
+      Sequence_print_header(fp,headerseq,checksump);
+      Pair_print_mask_introns(fp,Stage3_pairarray(stage3array[0]),Stage3_npairs(stage3array[0]),
+			      Stage3_chrlength(stage3array[0]),wraplength,/*include_utr_p*/false);
+
+    } else if (printtype == MASK_UTR_INTRONS) {
+      /* Print only best path */
+      PUTC('>',fp);
+      Sequence_print_header(fp,headerseq,checksump);
+      Pair_print_mask_introns(fp,Stage3_pairarray(stage3array[0]),Stage3_npairs(stage3array[0]),
+			      Stage3_chrlength(stage3array[0]),wraplength,/*include_utr_p*/true);
+
     } else if (printtype == CDNA) {
       for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
 	PUTC('>',fp);


=====================================
src/output.h
=====================================
@@ -1,11 +1,11 @@
-/* $Id: output.h 218147 2019-01-16 21:28:41Z twu $ */
+/* $Id: output.h 222194 2020-03-23 13:44:44Z twu $ */
 #ifndef OUTPUT_INCLUDED
 #define OUTPUT_INCLUDED
 
 typedef enum {STD_OUTPUT, SAM_OUTPUT, M8_OUTPUT} Outputtype_T;
 
 typedef enum {SIMPLE, SUMMARY, ALIGNMENT, COMPRESSED, CONTINUOUS, CONTINUOUS_BY_EXON,
-	      EXONS_CDNA, EXONS_GENOMIC, CDNA, PROTEIN_GENOMIC,
+	      EXONS_CDNA, EXONS_GENOMIC, MASK_INTRONS, MASK_UTR_INTRONS, CDNA, PROTEIN_GENOMIC,
 	      PSL_NT, PSL_PRO, GFF3_GENE, GFF3_MATCH_CDNA, GFF3_MATCH_EST, BEDPE,
 	      SAM, COORDS, SPLICESITES, INTRONS, MAP_RANGES, MAP_EXONS} Printtype_T;
 


=====================================
src/pair.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 221673 2020-02-11 16:17:58Z twu $";
+static char rcsid[] = "$Id: pair.c 222194 2020-03-23 13:44:44Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -9519,6 +9519,86 @@ Pair_print_introns (Filestring_T fp, struct T *pairs, int npairs, char *accessio
 }
 
 
+static int
+print_Ns (Filestring_T fp, int column, int n, int wraplength) {
+  int i;
+
+  for (i = 0; i < n; i++) {
+    PUTC('N',fp);
+    if (++column % wraplength == 0) {
+      PUTC('\n',fp);
+      column = 0;
+    }
+  }
+
+  return column;
+}
+
+
+void
+Pair_print_mask_introns (Filestring_T fp, struct T *pairs, int npairs,
+			 Chrpos_T chrlength, int wraplength, bool include_utr_p) {
+  int exoni = 0, column = 0, i;
+  bool in_exon = false;
+  struct T *ptr = pairs, *this = NULL;
+  Chrpos_T exon_genomestart = 0, exon_genomeend;
+  Chrpos_T last_genomepos = (Chrpos_T) -1;
+
+  assert(pairs != NULL);
+  if (include_utr_p == true) {
+    column = print_Ns(fp,column,pairs->genomepos,wraplength);
+  }
+
+  for (i = 0; i < npairs; i++) {
+    /* prev = this; */
+    this = ptr++;
+
+    if (this->gapp) {
+      if (in_exon == true) {
+	/* Beginning of gap */
+	exon_genomeend = last_genomepos + ONEBASEDP;
+	in_exon = false;
+      }
+    } else if (this->comp == INTRONGAP_COMP) {
+      /* Do nothing */
+    } else {
+      /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP, 
+	 SHORTGAP_COMP, or MISMATCH_COMP */
+      if (in_exon == false) {
+	exoni++;
+	if (exoni > 1) {
+	  exon_genomestart = this->genomepos + ONEBASEDP;
+	  column = print_Ns(fp,column,exon_genomestart - exon_genomeend - 1,wraplength);
+	}
+	
+	in_exon = true;
+      }
+      if (this->genome != ' ') {
+	PUTC(this->genome,fp);
+	if (++column % wraplength == 0) {
+	  PUTC('\n',fp);
+	  column = 0;
+	}
+      }
+    }
+  
+    if (this->genome != ' ') {
+      last_genomepos = this->genomepos;
+    }
+  }
+  
+  if (include_utr_p == true) {
+    column = print_Ns(fp,column,chrlength - last_genomepos - 1,wraplength);
+  }
+
+  if (column != 0) {
+    PUTC('\n',fp);
+  }
+
+  return;
+}
+
+
 #if 0
 /* goal_start < goal_end */
 Chrpos_T


=====================================
src/pair.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: pair.h 221628 2020-02-04 22:28:32Z twu $ */
+/* $Id: pair.h 222194 2020-03-23 13:44:44Z twu $ */
 #ifndef PAIR_INCLUDED
 #define PAIR_INCLUDED
 
@@ -279,6 +279,10 @@ Pair_print_splicesites (Filestring_T fp, struct T *pairs, int npairs, char *acce
 extern void
 Pair_print_introns (Filestring_T fp, struct T *pairs, int npairs, char *accession,
 		    int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit);
+extern void
+Pair_print_mask_introns (Filestring_T fp, struct T *pairs, int npairs,
+			 Chrpos_T chrlength, int wraplength, bool include_utr_p);
+
 
 extern int
 Pair_nmatches_posttrim (int *max_match_length, List_T pairs, int pos5, int pos3);


=====================================
src/regiondb-write.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: regiondb-write.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: regiondb-write.c 222312 2020-04-05 16:57:52Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -559,12 +559,13 @@ Region_remainder (Region_T current, Region_T buffer, int shift, size_t offsets_s
 }
 
 
+/* A region has offsets_size (4^6 = 4096) offsets, followed by REGION_LENGTH positions */
 void
 Regiondb_cat (FILE *out, char **files, Univcoord_T *genomelengths, int nfiles, Width_T region1part) {
   FILE *fp;
   char *file;
   size_t offsets_size = 0;
-  int current_pos, bufferlen;
+  int current_pos = 0, bufferlen;
   Region_T current, buffer, temp;
 #ifdef DEBUG1
   int k;


=====================================
src/stage3hr.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 220326 2019-09-12 20:40:15Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 222150 2020-03-13 22:28:19Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -3636,6 +3636,9 @@ Stage3end_new_terminal (int *found_score_overall, int *found_score_within_trims,
   /* There is no junctions_HtoL field */
   /* Do not use junctions after this */
 
+  new->querystart_chrbound = Substring_querystart_chrbound(substring);
+  new->queryend_chrbound = Substring_queryend_chrbound(substring);
+
 #if 0
   /* No need to reverse for a single substring */
   if (gplusp == true) {
@@ -3847,6 +3850,8 @@ Stage3end_new_precomputed (int *found_score_overall, int *found_score_within_tri
 
   substring1 = (Substring_T) List_head(new->substrings_1toN);
   substringN = (Substring_T) List_head(new->substrings_Nto1);
+  new->querystart_chrbound = Substring_querystart_chrbound(substring1);
+  new->queryend_chrbound = Substring_queryend_chrbound(substringN);
 
   genomicstart = Substring_genomicstart(substring1);
   genomicend = Substring_genomicend(substringN); /* DOESN'T WORK FOR AMBIGUOUS */
@@ -4666,6 +4671,8 @@ Stage3end_new_substrings (int *found_score_overall, int *found_score_within_trim
 
   substring1 = (Substring_T) List_head(new->substrings_1toN);
   substringN = (Substring_T) List_head(new->substrings_Nto1);
+  new->querystart_chrbound = Substring_querystart_chrbound(substring1);
+  new->queryend_chrbound = Substring_queryend_chrbound(substringN);
 
   genomicstart = Substring_genomicstart(substring1);
   genomicend = Substring_genomicend(substringN); /* DOESN'T WORK FOR AMBIGUOUS */
@@ -4981,6 +4988,9 @@ Stage3end_new_substitution (int *found_score_overall, int *found_score_within_tr
   new->junctions_1toN = (List_T) NULL;
   new->junctions_Nto1 = (List_T) NULL;
   
+  new->querystart_chrbound = Substring_querystart_chrbound(substring);
+  new->queryend_chrbound = Substring_queryend_chrbound(substring);
+
   new->transcripts = (List_T) NULL;
   new->transcripts_other = (List_T) NULL;
   
@@ -5249,6 +5259,10 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
   assert(Substring_querystart(List_head(new->substrings_1toN)) < Substring_querystart(List_head(new->substrings_Nto1)));
   /* Done assigning substrings */
 
+  substring1 = (Substring_T) List_head(new->substrings_1toN);
+  substringN = (Substring_T) List_head(new->substrings_Nto1);
+  new->querystart_chrbound = Substring_querystart_chrbound(substring1);
+  new->queryend_chrbound = Substring_queryend_chrbound(substringN);
 
   if (new->chrnum != 0) {
     /* Ordinary splice.  No need to distinguish effective_chrnum and other_chrnum */
@@ -5257,8 +5271,6 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
     new->other_chrnum = 0;
 
     /* Define coordinates as usual */
-    substring1 = (Substring_T) List_head(new->substrings_1toN);
-    substringN = (Substring_T) List_head(new->substrings_Nto1);
     new->genomicstart = Substring_genomicstart(substring1);
     new->genomicend = Substring_genomicend(substringN);
     new->plusp = Substring_plusp(substring1);
@@ -5266,12 +5278,12 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
   } else {
     /* Translocation.  Concordant substring is the inner one */
     if (first_read_p == true) {
-      substring_for_concordance = (Substring_T) List_head(new->substrings_Nto1);
-      substring_other = (Substring_T) List_head(new->substrings_1toN);
+      substring_for_concordance = substringN;  /* (Substring_T) List_head(new->substrings_Nto1); */
+      substring_other = substring1;  /* (Substring_T) List_head(new->substrings_1toN); */
       debug0(printf("Since first read, substring for concordance is at chr %d\n",Substring_chrnum(substring_for_concordance)));
     } else {
-      substring_for_concordance = (Substring_T) List_head(new->substrings_1toN);
-      substring_other = (Substring_T) List_head(new->substrings_Nto1);
+      substring_for_concordance = substring1;  /* (Substring_T) List_head(new->substrings_1toN); */
+      substring_other = substringN;  /* (Substring_T) List_head(new->substrings_Nto1); */
       debug0(printf("Since second read, substring for concordance is at chr %d\n",Substring_chrnum(substring_for_concordance)));
     }
       
@@ -5282,8 +5294,6 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
     new->genomicstart = Substring_genomicstart(substring_for_concordance);
     new->genomicend = Substring_genomicend(substring_for_concordance);
     new->plusp = Substring_plusp(substring_for_concordance);
-      
-
   }
 
 #ifdef DEBUG0
@@ -5427,7 +5437,7 @@ Stage3end_new_distant (int *found_score_overall, int *found_score_within_trims,
   T new;
   Substring_T substring_for_concordance; /* always the inner substring */
   Substring_T substring_other;		 /* the outer substring */
-  Substring_T substring;
+  Substring_T substring, substring1, substringN;
   Substring_T donor, acceptor;
   Junction_T junction;
 
@@ -5532,14 +5542,19 @@ Stage3end_new_distant (int *found_score_overall, int *found_score_within_trims,
   /* Done assigning substrings */
 
 
+  substring1 = (Substring_T) List_head(new->substrings_1toN);
+  substringN = (Substring_T) List_head(new->substrings_Nto1);
+  new->querystart_chrbound = Substring_querystart_chrbound(substring1);
+  new->queryend_chrbound = Substring_queryend_chrbound(substringN);
+
   /* Translocation.  Concordant substring is the inner one */
   if (first_read_p == true) {
-    substring_for_concordance = (Substring_T) List_head(new->substrings_Nto1);
-    substring_other = (Substring_T) List_head(new->substrings_1toN);
+    substring_for_concordance = substringN;  /* (Substring_T) List_head(new->substrings_Nto1); */
+    substring_other = substring1;  /* (Substring_T) List_head(new->substrings_1toN); */
     debug0(printf("Since first read, substring for concordance is at chr %d\n",Substring_chrnum(substring_for_concordance)));
   } else {
-    substring_for_concordance = (Substring_T) List_head(new->substrings_1toN);
-    substring_other = (Substring_T) List_head(new->substrings_Nto1);
+    substring_for_concordance = substring1;  /* (Substring_T) List_head(new->substrings_1toN); */
+    substring_other = substringN;  /* (Substring_T) List_head(new->substrings_Nto1); */
     debug0(printf("Since second read, substring for concordance is at chr %d\n",Substring_chrnum(substring_for_concordance)));
   }
       
@@ -6248,7 +6263,7 @@ Stage3end_filter (List_T hits, Hitlistpool_T hitlistpool, int max_mismatches, in
       printf("Comparing score %d against max_mismatches %d, and coverage %d against min_coverage %d\n",
 	     hit->score,max_mismatches,hit->querylength - hit->trim_querystart - hit->trim_queryend,min_coverage);
 #endif
-      if (hit->score_overall > max_mismatches) {
+      if (hit->score_overall > max_mismatches + hit->querystart_chrbound + (hit->querylength - hit->queryend_chrbound)) {
 	Stage3end_free(&hit);
       } else if (hit->querylength - hit->trim_querystart - hit->trim_queryend < min_coverage) {
 	Stage3end_free(&hit);
@@ -6264,7 +6279,7 @@ Stage3end_filter (List_T hits, Hitlistpool_T hitlistpool, int max_mismatches, in
       printf("Comparing score_ignore_trim %d against max_mismatches %d, and coverage %d against min_coverage %d\n",
 	     hit->score_ignore_trim,max_mismatches,hit->querylength - hit->trim_querystart - hit->trim_queryend,min_coverage);
 #endif
-      if (hit->score_within_trims > max_mismatches) {
+      if (hit->score_within_trims > max_mismatches + hit->querystart_chrbound + (hit->querylength - hit->queryend_chrbound)) {
 	Stage3end_free(&hit);
       } else if (hit->querylength - hit->trim_querystart - hit->trim_queryend < min_coverage) {
 	Stage3end_free(&hit);


=====================================
src/stage3hrdef.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: stage3hrdef.h 221582 2020-01-30 21:28:29Z twu $ */
+/* $Id: stage3hrdef.h 222150 2020-03-13 22:28:19Z twu $ */
 #ifndef STAGE3HRDEF_INCLUDED
 #define STAGE3HRDEF_INCLUDED
 
@@ -47,6 +47,8 @@ struct T {
   Univcoord_T chrhigh;
   Chrpos_T chrlength;
 
+  int querystart_chrbound;
+  int queryend_chrbound;
   int querylength;		/* Needed for overlap and pairlength calculations */
   int querylength_adj;		/* Adjusted for insertions */
 


=====================================
src/substring.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 220326 2019-09-12 20:40:15Z twu $";
+static char rcsid[] = "$Id: substring.c 222150 2020-03-13 22:28:19Z twu $";
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
@@ -261,6 +261,10 @@ struct T {
   int queryend_pretrim;		/* From original invocation, before trimming */
   int querystart;		/* For part that aligns to genome, post-trim */
   int queryend;
+
+  int querystart_chrbound;	/* Bound on alignment if this were substring1 */
+  int queryend_chrbound;	/* Bound on alignment if this were substringN */
+
   int amb_splice_pos;		/* Used for ambiguous substrings.  Based on left, not querystart */
   int querylength;
 
@@ -1707,9 +1711,32 @@ Substring_new (int nmismatches, Univcoord_T left, int querystart, int queryend,
   if (plusp == true) {
     new->genomicstart = left;
     new->genomicend = left + querylength;
+
+    if (left + querylength > chroffset + querylength) {
+      new->querystart_chrbound = 0;
+    } else {
+      new->querystart_chrbound = chroffset - left;
+    }
+    if (left + querylength < chrhigh) {
+      new->queryend_chrbound = querylength;
+    } else {
+      new->queryend_chrbound = chrhigh - left;
+    }
+
   } else {
     new->genomicstart = left + querylength;
     new->genomicend = left;
+
+    if (left + querylength < chrhigh) {
+      new->querystart_chrbound = 0;
+    } else {
+      new->querystart_chrbound = (left + querylength) - chrhigh;
+    }
+    if (left + querylength > chroffset + querylength) {
+      new->queryend_chrbound = querylength;
+    } else {
+      new->queryend_chrbound = (left + querylength) - chroffset;
+    }
   }
 
   new->trim_querystart_splicep = splice_querystart_p;
@@ -2621,9 +2648,32 @@ Substring_new_simple (int nmismatches, Univcoord_T left, int querystart, int que
   if (plusp == true) {
     new->genomicstart = left;
     new->genomicend = left + querylength;
+
+    if (left + querylength > chroffset + querylength) {
+      new->querystart_chrbound = 0;
+    } else {
+      new->querystart_chrbound = chroffset - left;
+    }
+    if (left + querylength < chrhigh) {
+      new->queryend_chrbound = querylength;
+    } else {
+      new->queryend_chrbound = chrhigh - left;
+    }
+
   } else {
     new->genomicstart = left + querylength;
     new->genomicend = left;
+
+    if (left + querylength < chrhigh) {
+      new->querystart_chrbound = 0;
+    } else {
+      new->querystart_chrbound = (left + querylength) - chrhigh;
+    }
+    if (left + querylength > chroffset + querylength) {
+      new->queryend_chrbound = querylength;
+    } else {
+      new->queryend_chrbound = (left + querylength) - chroffset;
+    }
   }
 
   new->amb_splice_pos = 0;
@@ -2790,6 +2840,9 @@ Substring_new_alts_D (int querystart, int queryend, int splice_pos, int querylen
     new->genomicend = Univcoord_max(alts_coords,alts_ncoords);
   }
 
+  new->querystart_chrbound = 0;
+  new->queryend_chrbound = querylength;
+
   new->start_endtype = END;
   new->end_endtype = END;
 
@@ -2899,6 +2952,9 @@ Substring_new_alts_A (int querystart, int queryend, int splice_pos, int querylen
     new->genomicend = Univcoord_max(alts_coords,alts_ncoords);
   }
 
+  new->querystart_chrbound = 0;
+  new->queryend_chrbound = querylength;
+
   new->start_endtype = END;
   new->end_endtype = END;
 
@@ -3524,6 +3580,17 @@ Substring_querylength (T this) {
   return this->querylength;
 }
 
+int
+Substring_querystart_chrbound (T this) {
+  return this->querystart_chrbound;
+}
+
+int
+Substring_queryend_chrbound (T this) {
+  return this->queryend_chrbound;
+}
+
+
 int
 Substring_match_length (T this) {
   if (this == NULL) {
@@ -3893,6 +3960,9 @@ Substring_copy (T old) {
     new->queryend_pretrim = old->queryend_pretrim;
     new->querystart = old->querystart;
     new->queryend = old->queryend;
+    new->querystart_chrbound = old->querystart_chrbound;
+    new->queryend_chrbound = old->queryend_chrbound;
+
     new->amb_splice_pos = old->amb_splice_pos;
     new->querylength = old->querylength;
 


=====================================
src/substring.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: substring.h 220326 2019-09-12 20:40:15Z twu $ */
+/* $Id: substring.h 222150 2020-03-13 22:28:19Z twu $ */
 #ifndef SUBSTRING_INCLUDED
 #define SUBSTRING_INCLUDED
 
@@ -215,6 +215,11 @@ extern int
 Substring_queryend (T this);
 extern int
 Substring_querylength (T this);
+extern int
+Substring_querystart_chrbound (T this);
+extern int
+Substring_queryend_chrbound (T this);
+
 extern int
 Substring_match_length (T this);
 extern int


=====================================
util/gmap_cat.pl.in
=====================================
@@ -10,11 +10,11 @@ my $bindir = "@BINDIR@";   # dirname(__FILE__)
 
 use IO::File;
 use Getopt::Long;
+use File::Basename;
 
 Getopt::Long::Configure(qw(no_auto_abbrev no_ignore_case_always));
 
 GetOptions(
-    'F|sourcedirs=s' => \$sourcedirs,   # source directories
     'D|dir=s' => \$destdir,	# destination directory
     'd|db=s' => \$dbname,	# genome name
     'n|names=s' => \$contig_name_file,   # substitute contig names
@@ -41,12 +41,7 @@ if (!defined($dbname)) {
     }
 }
 
-if (!defined($sourcedirs)) {
-    print STDERR "Source directories not defined with -F, so using the default directory $gmapdb\n";
-    @sourcedirs = ($gmapdb);
-} else {
-    @sourcedirs = split ",",$sourcedirs;
-}
+
 if (!defined($destdir) || $destdir !~ /\S/) {
     print STDERR "Destination directory not defined with -D flag, so writing to $gmapdb\n";
     $destdir = $gmapdb;
@@ -111,13 +106,10 @@ $IIT = new IO::File(" | $bindir/iit_store -1 -o $dbdir/$dbname.chromosome");
 %seenp = ();
 $genomelen = 0;
 foreach $input_genome (@input_genomes) {
-    $i = 0;
-    while ($i <= $#sourcedirs &&
-	   !defined($FP = new IO::File("$sourcedirs[$i]/$input_genome/$input_genome.chromosome"))) {
-	$i++;
-    }
-    if (!defined($FP)) {
-	die "Cannot open $input_genome/$input_genome.chromosome in any of the given sourcedirs";
+    $input_dbroot = basename($input_genome);
+    if (!defined($FP = new IO::File("$input_genome/$input_dbroot.chromosome"))) {
+	print STDERR "Expecting full paths to input genomes";
+	die "Cannot open $input_genome/$input_dbroot.chromosome in any of the given sourcedirs";
     }
 
     while (defined($line = <$FP>)) {
@@ -169,7 +161,7 @@ close($OUT);
 
 # (2) Merge genomecomp and genomebits
 print STDERR "Merging genome strings\n";
-$cmd = "\"$bindir/gmapindex\" -F \"$sourcedirs\" -D \"$dbdir\" -d \"$dbname\" -C " . join(" ", at input_genomes);
+$cmd = "\"$bindir/gmapindex\" -D \"$dbdir\" -d \"$dbname\" -C " . join(" ", at input_genomes);
 print STDERR "Running $cmd\n";
 if (($rc = system($cmd)) != 0) {
     die "$cmd failed with return code $rc";
@@ -182,7 +174,7 @@ unshuffle_genome($bindir,$dbdir,$dbname,$genomecompfile);
 
 # (3) Merge indexdbs (offsets and positions)
 print STDERR "Merging offsets and positions\n";
-$cmd = "\"$bindir/indexdb_cat\" -F \"$sourcedirs\" -D \"$dbdir\" -d \"$dbname\" " . join(" ", at input_genomes);
+$cmd = "\"$bindir/indexdb_cat\" -D \"$dbdir\" -d \"$dbname\" " . join(" ", at input_genomes);
 print STDERR "Running $cmd\n";
 if (($rc = system($cmd)) != 0) {
     die "$cmd failed with return code $rc";
@@ -191,7 +183,7 @@ if (($rc = system($cmd)) != 0) {
 
 # (4) Merge regiondb
 print STDERR "Merging regional hash tables\n";
-$cmd = "\"$bindir/gmapindex\" -F \"$sourcedirs\" -D \"$dbdir\" -d \"$dbname\" -R " . join(" ", at input_genomes);
+$cmd = "\"$bindir/gmapindex\" -D \"$dbdir\" -d \"$dbname\" -R " . join(" ", at input_genomes);
 print STDERR "Running $cmd\n";
 if (($rc = system($cmd)) != 0) {
     die "$cmd failed with return code $rc";
@@ -260,11 +252,11 @@ sub print_usage {
 gmap_add: Concatenates gmap databases to be used by GMAP or GSNAP.
 Part of GMAP package, version $package_version.
 
-Usage: gmap_cat [options...] -d <output_genome_name> <input_genome_names...>
+Usage: gmap_cat [options...] -d <output_genome_name> <path/to/genome_dir...>
+
+Note: Input genomes should contain a full or local path for the directory containing the genome index
 
 Options:
-    -F, --sourcedirs=STRING   Directory or directories for input genome indices.  May specify multiple directories by
-                                separating them with a comma.  For each input genome, they will be searched in the given order.
     -D, --dir=STRING          Destination directory for output genome index
     -d, --db=STRING           Output genome name
 



View it on GitLab: https://salsa.debian.org/med-team/gmap/-/compare/6351a1f254104267b0a877812cd1eca7a8d873ac...580578c81a6f0a4856a08239759752a63be98e0e

-- 
View it on GitLab: https://salsa.debian.org/med-team/gmap/-/compare/6351a1f254104267b0a877812cd1eca7a8d873ac...580578c81a6f0a4856a08239759752a63be98e0e
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200411/70766592/attachment-0001.html>


More information about the debian-med-commit mailing list