[med-svn] [Git][med-team/gmap][upstream] 2 commits: New upstream version 2020-04-08+ds
Andreas Tille
gitlab at salsa.debian.org
Sat Apr 11 20:59:00 BST 2020
Andreas Tille pushed to branch upstream at Debian Med / gmap
Commits:
ea8a10e9 by Andreas Tille at 2020-04-11T07:49:13+02:00
New upstream version 2020-04-08+ds
- - - - -
580578c8 by Andreas Tille at 2020-04-11T18:20:40+02:00
New upstream version 2020-04-08+ds1
- - - - -
30 changed files:
- ChangeLog
- VERSION
- configure.ac
- src/access.c
- src/access.h
- src/compress-write.c
- src/compress-write.h
- src/dynprog_end.c
- src/get-genome.c
- − src/getopt.c
- − src/getopt.h
- − src/getopt1.c
- src/gmap.c
- src/gmapindex.c
- src/gsnap.c
- src/iit-read-univ.c
- src/iit-read.c
- src/iit_get.c
- src/iit_store.c
- src/indexdb-cat.c
- src/output.c
- src/output.h
- src/pair.c
- src/pair.h
- src/regiondb-write.c
- src/stage3hr.c
- src/stage3hrdef.h
- src/substring.c
- src/substring.h
- util/gmap_cat.pl.in
Changes:
=====================================
ChangeLog
=====================================
The diff for this file was not included because it is too large.
=====================================
VERSION
=====================================
@@ -1 +1 @@
-2020-03-12
\ No newline at end of file
+2020-04-08
\ No newline at end of file
=====================================
configure.ac
=====================================
@@ -390,7 +390,7 @@ else
AX_CPUID_NON_INTEL
fi
-AX_EXT
+AX_EXT # Sets SIMD_CFLAGS, and HAVE_ALTIVEC,HAVE_MMX,HAVE_SSE,HAVE_SSE2,...
if test "x$ax_cv_want_simd" = xno; then
compile_level=none
elif test "x$ax_make_avx512bw" = xyes; then
=====================================
src/access.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: access.c 222113 2020-03-06 18:05:33Z twu $";
+static char rcsid[] = "$Id: access.c 222390 2020-04-10 12:44:01Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1014,8 +1014,8 @@ Access_mmap (int *fd, size_t *len, double *seconds, char *filename, bool randomp
} else {
*len = length;
memory = mmap(NULL,length,PROT_READ,0
-#ifdef HAVE_MMAP_MAP_SHARED
- |MAP_SHARED
+#ifdef HAVE_MMAP_MAP_PRIVATE
+ |MAP_PRIVATE
#endif
#ifdef HAVE_MMAP_MAP_FILE
|MAP_FILE
@@ -1086,8 +1086,8 @@ Access_mmap_offset (int *remainder, int fd, size_t offset, size_t length, bool r
memory = (void *) NULL;
} else {
memory = mmap(NULL,length,PROT_READ,0
-#ifdef HAVE_MMAP_MAP_SHARED
- |MAP_SHARED
+#ifdef HAVE_MMAP_MAP_PRIVATE
+ |MAP_PRIVATE
#endif
#ifdef HAVE_MMAP_MAP_FILE
|MAP_FILE
@@ -1127,7 +1127,8 @@ Access_mmap_offset (int *remainder, int fd, size_t offset, size_t length, bool r
#endif
-
+#if 0
+/* All programs are read-only */
#ifdef HAVE_MMAP
/* Returns NULL if mmap fails. Bigendian conversion required */
#ifdef HAVE_CADDR_T
@@ -1196,7 +1197,11 @@ Access_mmap_rw (int *fd, size_t *len, char *filename, bool randomp) {
return memory;
}
#endif
+#endif /* if 0 */
+
+#if 0
+/* All programs are read-only */
#ifdef HAVE_MMAP
/* Returns NULL if mmap fails. Bigendian conversion required */
#ifdef HAVE_CADDR_T
@@ -1263,7 +1268,7 @@ Access_mmap_offset_rw (int *remainder, int fd, size_t offset, size_t length, boo
return memory;
}
#endif
-
+#endif /* if 0 */
@@ -1314,8 +1319,8 @@ Access_mmap_and_preload (int *fd, size_t *len, int *npages, double *seconds, cha
Stopwatch_start(stopwatch = Stopwatch_new());
memory = mmap(NULL,length,PROT_READ,0
-#ifdef HAVE_MMAP_MAP_SHARED
- |MAP_SHARED
+#ifdef HAVE_MMAP_MAP_PRIVATE
+ |MAP_PRIVATE
#endif
#ifdef HAVE_MMAP_MAP_FILE
|MAP_FILE
=====================================
src/access.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: access.h 222113 2020-03-06 18:05:33Z twu $ */
+/* $Id: access.h 222390 2020-04-10 12:44:01Z twu $ */
#ifndef ACCESS_INCLUDED
#define ACCESS_INCLUDED
#ifdef HAVE_CONFIG_H
@@ -73,20 +73,6 @@ extern void *
#endif
Access_mmap_offset (int *remainder, int fd, size_t offset, size_t length, bool randomp);
-#ifdef HAVE_CADDR_T
-extern caddr_t
-#else
-extern void *
-#endif
-Access_mmap_rw (int *fd, size_t *len, char *filename, bool randomp);
-
-#ifdef HAVE_CADDR_T
-extern caddr_t
-#else
-extern void *
-#endif
-Access_mmap_offset_rw (int *remainder, int fd, size_t offset, size_t length, bool randomp);
-
#ifdef HAVE_CADDR_T
extern caddr_t
#else
=====================================
src/compress-write.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: compress-write.c 221920 2020-02-20 02:05:50Z twu $";
+static char rcsid[] = "$Id: compress-write.c 222312 2020-04-05 16:57:52Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -25,6 +25,8 @@ static char rcsid[] = "$Id: compress-write.c 221920 2020-02-20 02:05:50Z twu $";
#endif
#include "complement.h"
#include "mem.h" /* For Compress_new */
+#include "getline.h"
+
/* Compress_cat */
#ifdef DEBUG1
@@ -526,6 +528,123 @@ Compress_update_memory (int nbadchars, Genomecomp_T *genomecomp, char *gbuffer,
}
+void
+Compress_compress (char **files, int nfiles, bool stdin_p) {
+ Genomecomp_T low = 0U, high = 0U, flags = 0U, carry;
+ Univcoord_T position = 0UL;
+ FILE *fp;
+ char *line, *p;
+ int c;
+ int in_counter = 0, filei;
+
+
+ fprintf(stderr,"Compressing genome");
+
+ if (stdin_p == true) {
+ nfiles = 1;
+ }
+ for (filei = 0; filei < nfiles; filei++) {
+ if (stdin_p == true) {
+ fp = stdin;
+ } else if ((fp = fopen(files[filei],"r")) == NULL) {
+ fprintf(stderr,"Could not open file %s\n",files[filei]);
+ exit(9);
+ }
+
+ while ((line = Getline(fp)) != NULL) {
+ if (line[0] == '>') {
+ /* Skip header */
+ } else {
+ p = line;
+ while ((c = (int) *p++) != '\0') {
+ if (isalpha(c)) {
+ in_counter++;
+
+ carry = high & 3U;
+ high >>= 2;
+ low >>= 2;
+ flags >>= 1;
+ switch (carry) {
+ case 0U: break;
+ case 1U: low |= UINT4_LEFT_C; break;
+ case 2U: low |= UINT4_LEFT_G; break;
+ case 3U: low |= UINT4_LEFT_T; break;
+ default: abort();
+ }
+
+ switch (uppercaseCode[c]) {
+ case 'A': break;
+ case 'C': high |= UINT4_LEFT_C; break;
+ case 'G': high |= UINT4_LEFT_G; break;
+ case 'T': high |= UINT4_LEFT_T; break;
+ case 'N': flags |= UINT4_LEFT_BIT; break;
+ case 'X': high |= UINT4_LEFT_T; flags |= UINT4_LEFT_BIT; break;
+ default:
+ fprintf(stderr,"Non-standard nucleotide %c at position %lu. Using N instead\n",c,position);
+ flags |= UINT4_LEFT_BIT;
+ break;
+ }
+
+ /* 8 is simply bits per byte */
+ if (in_counter == 8 * (int) sizeof(Genomecomp_T)) {
+ FWRITE_UINT(high,stdout);
+ FWRITE_UINT(low,stdout);
+ FWRITE_UINT(flags,stdout);
+
+ low = high = flags = 0U;
+ in_counter = 0;
+ }
+
+ position++;
+ if (position % MONITOR_INTERVAL == 0) {
+ fprintf(stderr,".",position);
+ }
+ }
+ }
+ }
+
+ FREE(line);
+ }
+
+ if (stdin_p == false) {
+ fclose(fp);
+ }
+ }
+
+ if (in_counter > 0) {
+ while (in_counter < 8 * (int) sizeof(Genomecomp_T)) {
+ carry = high & 3U;
+ high >>= 2;
+ low >>= 2;
+ flags >>= 1;
+ switch (carry) {
+ case 0U: break;
+ case 1U: low |= UINT4_LEFT_C; break;
+ case 2U: low |= UINT4_LEFT_G; break;
+ case 3U: low |= UINT4_LEFT_T; break;
+ default: abort();
+ }
+ high |= UINT4_LEFT_T; flags |= UINT4_LEFT_BIT; /* Create X's at end */
+ in_counter++;
+ }
+
+ FWRITE_UINT(high,stdout);
+ FWRITE_UINT(low,stdout);
+ FWRITE_UINT(flags,stdout);
+ }
+
+ /* Plus extra high and low */
+ high = low = 0xFFFFFFFF;
+ FWRITE_UINT(high,stdout);
+ FWRITE_UINT(low,stdout);
+
+ fprintf(stderr,"\n");
+
+ return;
+}
+
+
+
#ifdef HAVE_64_BIT
static inline void
nt_unshuffle (UINT4 *highbits, UINT4 *lowbits, UINT4 high, UINT4 low) {
@@ -713,7 +832,7 @@ void
Compress_cat (FILE *out, char **files, Univcoord_T *genomelengths, int nfiles) {
FILE *fp;
char *file;
- int current_pos, bufferlen, shift;
+ int current_pos = 0, bufferlen, shift;
UINT4 Buffer[3];
#ifdef HAVE_64_BIT
UINT8 current_highlow = 0, buffer_highlow = 0;
=====================================
src/compress-write.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: compress-write.h 221681 2020-02-12 03:28:08Z twu $ */
+/* $Id: compress-write.h 222204 2020-03-23 22:40:15Z twu $ */
#ifndef COMPRESS_WRITE_INCLUDED
#define COMPRESS_WRITE_INCLUDED
@@ -17,6 +17,8 @@ extern int
Compress_update_memory (int nbadchars, Genomecomp_T *genomecomp, char *gbuffer, Univcoord_T startpos,
Univcoord_T endpos);
extern void
+Compress_compress (char **files, int nfiles, bool stdin_p);
+extern void
Compress_unshuffle (FILE *out, FILE *in);
extern void
Compress_unshuffle_bits128 (FILE *out, FILE *in);
=====================================
src/dynprog_end.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: dynprog_end.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: dynprog_end.c 222316 2020-04-05 18:18:36Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -580,7 +580,7 @@ find_best_endpoint_to_queryend_nogaps (int *bestr, int *bestc, int rlength, int
static char complCode[128] = COMPLEMENT_LC;
-#if 0
+#ifdef DEBUG_SIMD
static char
get_genomic_nt (char *g_alt, int genomicpos, Univcoord_T chroffset, Univcoord_T chrhigh,
bool watsonp) {
=====================================
src/get-genome.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: get-genome.c 215487 2018-05-26 03:18:11Z twu $";
+static char rcsid[] = "$Id: get-genome.c 222304 2020-04-02 22:17:13Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -79,6 +79,7 @@ static bool codingp = false;
/* Dump options */
static bool dumpallp = false;
+static bool add_circular_p = false;
static bool stream_chars_p = false;
static bool stream_ints_p = false;
static bool dumpchrp = false;
@@ -117,6 +118,7 @@ static struct option long_options[] = {
/* Dump options */
{"dump", no_argument, 0, 'A'}, /* dumpallp */
+ {"add-circular", no_argument, 0, 0}, /* add_circular_p */
{"stream-chars", no_argument, 0, 0}, /* stream_chars_p */
{"chromosomes", no_argument, 0, 'L'}, /* dumpchrp */
{"forsam", no_argument, 0, 0}, /* dumpchr_forsam_p */
@@ -415,7 +417,7 @@ index_compare (const void *a, const void *b) {
static void
print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, Chrpos_T genomiclength,
- Univ_IIT_T chromosome_iit, bool whole_chromosome_p) {
+ Univ_IIT_T chromosome_iit, bool whole_chromosome_p, bool circular_part_p) {
char *chromosome1, *chromosome2, *ptr, c;
Sequence_T genomicseg, genomicseg_alt, genomicseg_snp;
Chrpos_T chrpos;
@@ -429,6 +431,8 @@ print_sequence (Genome_T genome, Genome_T genomealt, Univcoord_T genomicstart, C
/* Don't print a header */
} else if (header != NULL) {
printf(">%s\n",header);
+ } else if (circular_part_p == true) {
+ /* Don't print a header */
} else if (revcomp == true) {
printf(">");
chromosome1 = Univ_IIT_string_from_position(&chrpos,genomicstart+genomiclength-1,chromosome_iit);
@@ -1279,12 +1283,15 @@ main (int argc, char *argv[]) {
} else if (!strcmp(long_name,"stream-ints")) {
stream_ints_p = true;
+ } else if (!strcmp(long_name,"add-circular")) {
+ add_circular_p = true;
+
} else if (!strcmp(long_name,"coding")) {
codingp = true;
} else {
/* Shouldn't reach here */
- fprintf(stderr,"Don't recognize option %s. For usage, run 'get-genome --help'",long_name);
+ fprintf(stderr,"Don't recognize option %s. For usage, run 'get-genome --help'\n",long_name);
exit(9);
}
break;
@@ -1364,11 +1371,11 @@ main (int argc, char *argv[]) {
if (Parserange_universal(&segment,&revcomp,&whole_chromosome_p,&genomicstart,&genomiclength,&chrstart,&chrend,
&chroffset,&chrlength,with_colon,genomesubdir,fileroot) == true) {
print_sequence(genome,/*genomealt*/NULL,genomicstart,genomiclength,chromosome_iit,
- /*whole_chromosome_p*/true);
+ /*whole_chromosome_p*/true,/*circular_part_p*/false);
if (circularp[indx] == true) {
/* Print again, since internal genome represents circular chromosomes twice */
print_sequence(genome,/*genomealt*/NULL,genomicstart,genomiclength,chromosome_iit,
- /*whole_chromosome_p*/true);
+ /*whole_chromosome_p*/true,/*circular_part_p*/true);
}
}
FREE(segment);
@@ -1486,6 +1493,9 @@ main (int argc, char *argv[]) {
chromosome_iit = Univ_IIT_read(iitfile,/*readonlyp*/true,/*add_iit_p*/false);
FREE(iitfile);
+ /* Needed if --with-circular is specified */
+ circularp = Univ_IIT_circularp(&any_circular_p,chromosome_iit);
+
if (snps_root == NULL || print_snps_mode == 0) {
genome = Genome_new(genomesubdir,fileroot,/*snps_root*/NULL,/*genometype*/GENOME_OLIGOS,
uncompressedp,/*access*/USE_MMAP_ONLY,/*sharedp*/false);
@@ -1509,7 +1519,12 @@ main (int argc, char *argv[]) {
if (Parserange_universal(&segment,&revcomp,&whole_chromosome_p,&genomicstart,&genomiclength,&chrstart,&chrend,
&chroffset,&chrlength,with_colon,genomesubdir,fileroot) == true) {
print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
- /*whole_chromosome_p*/true);
+ /*whole_chromosome_p*/true,/*circular_part_p*/false);
+ if (add_circular_p == true && circularp[indx] == true) {
+ /* Print again, since internal genome represents circular chromosomes twice */
+ print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
+ /*whole_chromosome_p*/true,/*circular_part_p*/true);
+ }
}
FREE(segment);
FREE(with_colon);
@@ -1690,7 +1705,7 @@ main (int argc, char *argv[]) {
debug(printf("Query %s parsed as: genomicstart = %llu, genomiclength = %u, revcomp = %d\n",
argv[0],(unsigned long long) genomicstart,genomiclength,revcomp));
print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
- whole_chromosome_p);
+ whole_chromosome_p,/*circular_part_p*/false);
}
FREE(segment);
@@ -1989,7 +2004,7 @@ main (int argc, char *argv[]) {
} else {
print_sequence(genome,genomealt,genomicstart,genomiclength,chromosome_iit,
- /*whole_chromosome_p*/false);
+ /*whole_chromosome_p*/false,/*circular_part_p*/false);
}
FREE(divstring2);
=====================================
src/getopt.c deleted
=====================================
@@ -1,1262 +0,0 @@
-/* Getopt for GNU.
- NOTE: getopt is now part of the C library, so if you don't know what
- "Keep this file name-space clean" means, talk to drepper at gnu.org
- before changing it!
- Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001,2002
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
- Ditto for AIX 3.2 and <stdlib.h>. */
-#ifndef _NO_PROTO
-# define _NO_PROTO
-#endif
-
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#if !defined __STDC__ || !__STDC__
-/* This is a separate conditional since some stdc systems
- reject `defined (const)'. */
-# ifndef const
-# define const
-# endif
-#endif
-
-#include <stdio.h>
-
-/* Comment out all this code if we are using the GNU C Library, and are not
- actually compiling the library itself. This code is part of the GNU C
- Library, but also included in many other GNU distributions. Compiling
- and linking in this code is a waste when using the GNU C library
- (especially if it is a shared library). Rather than having every GNU
- program understand `configure --with-gnu-libc' and omit the object files,
- it is simpler to just do this in the source for each such file. */
-
-#define GETOPT_INTERFACE_VERSION 2
-#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
-# include <gnu-versions.h>
-# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
-# define ELIDE_CODE
-# endif
-#endif
-
-#ifndef ELIDE_CODE
-
-
-/* This needs to come after some library #include
- to get __GNU_LIBRARY__ defined. */
-#ifdef __GNU_LIBRARY__
-/* Don't include stdlib.h for non-GNU C libraries because some of them
- contain conflicting prototypes for getopt. */
-# include <stdlib.h>
-# include <unistd.h>
-#endif /* GNU C library. */
-
-#ifdef VMS
-# include <unixlib.h>
-# if HAVE_STRING_H - 0
-# include <string.h>
-# endif
-#endif
-
-#ifndef attribute_hidden
-# define attribute_hidden
-#endif
-
-/* This version of `getopt' appears to the caller like standard Unix `getopt'
- but it behaves differently for the user, since it allows the user
- to intersperse the options with the other arguments.
-
- As `getopt' works, it permutes the elements of ARGV so that,
- when it is done, all the options precede everything else. Thus
- all application programs are extended to handle flexible argument order.
-
- Setting the environment variable POSIXLY_CORRECT disables permutation..
- Then the behavior is completely standard.
-
- GNU application programs can use a third alternative mode in which
- they can distinguish the relative order of options and other arguments. */
-
-#include "getopt.h"
-
-/* For communication from `getopt' to the caller.
- When `getopt' finds an option that takes an argument,
- the argument value is returned here.
- Also, when `ordering' is RETURN_IN_ORDER,
- each non-option ARGV-element is returned here. */
-
-char *optarg;
-
-/* Index in ARGV of the next element to be scanned.
- This is used for communication to and from the caller
- and for communication between successive calls to `getopt'.
-
- On entry to `getopt', zero means this is the first call; initialize.
-
- When `getopt' returns -1, this is the index of the first of the
- non-option elements that the caller should itself scan.
-
- Otherwise, `optind' communicates from one call to the next
- how much of ARGV has been scanned so far. */
-
-/* 1003.2 says this must be 1 before any call. */
-int optind = 1;
-
-/* Formerly, initialization of getopt depended on optind==0, which
- causes problems with re-calling getopt as programs generally don't
- know that. */
-
-int __getopt_initialized attribute_hidden;
-
-/* The next char to be scanned in the option-element
- in which the last option character we returned was found.
- This allows us to pick up the scan where we left off.
-
- If this is zero, or a null string, it means resume the scan
- by advancing to the next ARGV-element. */
-
-static char *nextchar;
-
-/* Callers store zero here to inhibit the error message
- for unrecognized options. */
-
-int opterr = 1;
-
-/* Set to an option character which was unrecognized.
- This must be initialized on some systems to avoid linking in the
- system's own getopt implementation. */
-
-int optopt = '?';
-
-/* Describe how to deal with options that follow non-option ARGV-elements.
-
- If the caller did not specify anything,
- the default is REQUIRE_ORDER if the environment variable
- POSIXLY_CORRECT is defined, PERMUTE otherwise.
-
- REQUIRE_ORDER means don't recognize them as options;
- stop option processing when the first non-option is seen.
- This is what Unix does.
- This mode of operation is selected by either setting the environment
- variable POSIXLY_CORRECT, or using `+' as the first character
- of the list of option characters.
-
- PERMUTE is the default. We permute the contents of ARGV as we scan,
- so that eventually all the non-options are at the end. This allows options
- to be given in any order, even with programs that were not written to
- expect this.
-
- RETURN_IN_ORDER is an option available to programs that were written
- to expect options and other ARGV-elements in any order and that care about
- the ordering of the two. We describe each non-option ARGV-element
- as if it were the argument of an option with character code 1.
- Using `-' as the first character of the list of option characters
- selects this mode of operation.
-
- The special argument `--' forces an end of option-scanning regardless
- of the value of `ordering'. In the case of RETURN_IN_ORDER, only
- `--' can cause `getopt' to return -1 with `optind' != ARGC. */
-
-static enum
-{
- REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
-} ordering;
-
-/* Value of POSIXLY_CORRECT environment variable. */
-static char *posixly_correct;
-
-#ifdef __GNU_LIBRARY__
-/* We want to avoid inclusion of string.h with non-GNU libraries
- because there are many ways it can cause trouble.
- On some systems, it contains special magic macros that don't work
- in GCC. */
-# include <string.h>
-# define my_index strchr
-#else
-
-# if HAVE_STRING_H
-# include <string.h>
-# else
-# include <strings.h>
-# endif
-
-/* Avoid depending on library functions or files
- whose names are inconsistent. */
-
-#ifndef getenv
-extern char *getenv ();
-#endif
-
-static char *
-my_index (str, chr)
- const char *str;
- int chr;
-{
- while (*str)
- {
- if (*str == chr)
- return (char *) str;
- str++;
- }
- return 0;
-}
-
-/* If using GCC, we can safely declare strlen this way.
- If not using GCC, it is ok not to declare it. */
-#ifdef __GNUC__
-/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
- That was relevant to code that was here before. */
-# if (!defined __STDC__ || !__STDC__) && !defined strlen
-/* gcc with -traditional declares the built-in strlen to return int,
- and has done so at least since version 2.4.5. -- rms. */
-extern int strlen (const char *);
-# endif /* not __STDC__ */
-#endif /* __GNUC__ */
-
-#endif /* not __GNU_LIBRARY__ */
-
-/* Handle permutation of arguments. */
-
-/* Describe the part of ARGV that contains non-options that have
- been skipped. `first_nonopt' is the index in ARGV of the first of them;
- `last_nonopt' is the index after the last of them. */
-
-static int first_nonopt;
-static int last_nonopt;
-
-#ifdef _LIBC
-/* Stored original parameters.
- XXX This is no good solution. We should rather copy the args so
- that we can compare them later. But we must not use malloc(3). */
-extern int __libc_argc;
-extern char **__libc_argv;
-
-/* Bash 2.0 gives us an environment variable containing flags
- indicating ARGV elements that should not be considered arguments. */
-
-# ifdef USE_NONOPTION_FLAGS
-/* Defined in getopt_init.c */
-extern char *__getopt_nonoption_flags;
-
-static int nonoption_flags_max_len;
-static int nonoption_flags_len;
-# endif
-
-# ifdef USE_NONOPTION_FLAGS
-# define SWAP_FLAGS(ch1, ch2) \
- if (nonoption_flags_len > 0) \
- { \
- char __tmp = __getopt_nonoption_flags[ch1]; \
- __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
- __getopt_nonoption_flags[ch2] = __tmp; \
- }
-# else
-# define SWAP_FLAGS(ch1, ch2)
-# endif
-#else /* !_LIBC */
-# define SWAP_FLAGS(ch1, ch2)
-#endif /* _LIBC */
-
-/* Exchange two adjacent subsequences of ARGV.
- One subsequence is elements [first_nonopt,last_nonopt)
- which contains all the non-options that have been skipped so far.
- The other is elements [last_nonopt,optind), which contains all
- the options processed since those non-options were skipped.
-
- `first_nonopt' and `last_nonopt' are relocated so that they describe
- the new indices of the non-options in ARGV after they are moved. */
-
-#if defined __STDC__ && __STDC__
-static void exchange (char **);
-#endif
-
-static void
-exchange (argv)
- char **argv;
-{
- int bottom = first_nonopt;
- int middle = last_nonopt;
- int top = optind;
- char *tem;
-
- /* Exchange the shorter segment with the far end of the longer segment..
- That puts the shorter segment into the right place.
- It leaves the longer segment in the right place overall,
- but it consists of two parts that need to be swapped next. */
-
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
- /* First make sure the handling of the `__getopt_nonoption_flags'
- string can work normally. Our top argument must be in the range
- of the string. */
- if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
- {
- /* We must extend the array. The user plays games with us and
- presents new arguments. */
- char *new_str = malloc (top + 1);
- if (new_str == NULL)
- nonoption_flags_len = nonoption_flags_max_len = 0;
- else
- {
- memset (__mempcpy (new_str, __getopt_nonoption_flags,
- nonoption_flags_max_len),
- '\0', top + 1 - nonoption_flags_max_len);
- nonoption_flags_max_len = top + 1;
- __getopt_nonoption_flags = new_str;
- }
- }
-#endif
-
- while (top > middle && middle > bottom)
- {
- if (top - middle > middle - bottom)
- {
- /* Bottom segment is the short one. */
- int len = middle - bottom;
- register int i;
-
- /* Swap it with the top part of the top segment. */
- for (i = 0; i < len; i++)
- {
- tem = argv[bottom + i];
- argv[bottom + i] = argv[top - (middle - bottom) + i];
- argv[top - (middle - bottom) + i] = tem;
- SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
- }
- /* Exclude the moved bottom segment from further swapping. */
- top -= len;
- }
- else
- {
- /* Top segment is the short one. */
- int len = top - middle;
- register int i;
-
- /* Swap it with the bottom part of the bottom segment. */
- for (i = 0; i < len; i++)
- {
- tem = argv[bottom + i];
- argv[bottom + i] = argv[middle + i];
- argv[middle + i] = tem;
- SWAP_FLAGS (bottom + i, middle + i);
- }
- /* Exclude the moved top segment from further swapping. */
- bottom += len;
- }
- }
-
- /* Update records for the slots the non-options now occupy. */
-
- first_nonopt += (optind - last_nonopt);
- last_nonopt = optind;
-}
-
-/* Initialize the internal data when the first call is made. */
-
-#if defined __STDC__ && __STDC__
-static const char *_getopt_initialize (int, char *const *, const char *);
-#endif
-static const char *
-_getopt_initialize (argc, argv, optstring)
- int argc;
- char *const *argv;
- const char *optstring;
-{
- /* Start processing options with ARGV-element 1 (since ARGV-element 0
- is the program name); the sequence of previously skipped
- non-option ARGV-elements is empty. */
-
- first_nonopt = last_nonopt = optind;
-
- nextchar = NULL;
-
- posixly_correct = getenv ("POSIXLY_CORRECT");
-
- /* Determine how to handle the ordering of options and nonoptions. */
-
- if (optstring[0] == '-')
- {
- ordering = RETURN_IN_ORDER;
- ++optstring;
- }
- else if (optstring[0] == '+')
- {
- ordering = REQUIRE_ORDER;
- ++optstring;
- }
- else if (posixly_correct != NULL)
- ordering = REQUIRE_ORDER;
- else
- ordering = PERMUTE;
-
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
- if (posixly_correct == NULL
- && argc == __libc_argc && argv == __libc_argv)
- {
- if (nonoption_flags_max_len == 0)
- {
- if (__getopt_nonoption_flags == NULL
- || __getopt_nonoption_flags[0] == '\0')
- nonoption_flags_max_len = -1;
- else
- {
- const char *orig_str = __getopt_nonoption_flags;
- int len = nonoption_flags_max_len = strlen (orig_str);
- if (nonoption_flags_max_len < argc)
- nonoption_flags_max_len = argc;
- __getopt_nonoption_flags =
- (char *) malloc (nonoption_flags_max_len);
- if (__getopt_nonoption_flags == NULL)
- nonoption_flags_max_len = -1;
- else
- memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
- '\0', nonoption_flags_max_len - len);
- }
- }
- nonoption_flags_len = nonoption_flags_max_len;
- }
- else
- nonoption_flags_len = 0;
-#endif
-
- return optstring;
-}
-
-/* Scan elements of ARGV (whose length is ARGC) for option characters
- given in OPTSTRING.
-
- If an element of ARGV starts with '-', and is not exactly "-" or "--",
- then it is an option element. The characters of this element
- (aside from the initial '-') are option characters. If `getopt'
- is called repeatedly, it returns successively each of the option characters
- from each of the option elements.
-
- If `getopt' finds another option character, it returns that character,
- updating `optind' and `nextchar' so that the next call to `getopt' can
- resume the scan with the following option character or ARGV-element.
-
- If there are no more option characters, `getopt' returns -1.
- Then `optind' is the index in ARGV of the first ARGV-element
- that is not an option. (The ARGV-elements have been permuted
- so that those that are not options now come last.)
-
- OPTSTRING is a string containing the legitimate option characters.
- If an option character is seen that is not listed in OPTSTRING,
- return '?' after printing an error message. If you set `opterr' to
- zero, the error message is suppressed but we still return '?'.
-
- If a char in OPTSTRING is followed by a colon, that means it wants an arg,
- so the following text in the same ARGV-element, or the text of the following
- ARGV-element, is returned in `optarg'. Two colons mean an option that
- wants an optional arg; if there is text in the current ARGV-element,
- it is returned in `optarg', otherwise `optarg' is set to zero.
-
- If OPTSTRING starts with `-' or `+', it requests different methods of
- handling the non-option ARGV-elements.
- See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
-
- Long-named options begin with `--' instead of `-'.
- Their names may be abbreviated as long as the abbreviation is unique
- or is an exact match for some defined option. If they have an
- argument, it follows the option name in the same ARGV-element, separated
- from the option name by a `=', or else the in next ARGV-element.
- When `getopt' finds a long-named option, it returns 0 if that option's
- `flag' field is nonzero, the value of the option's `val' field
- if the `flag' field is zero.
-
- The elements of ARGV aren't really const, because we permute them.
- But we pretend they're const in the prototype to be compatible
- with other systems.
-
- LONGOPTS is a vector of `struct option' terminated by an
- element containing a name which is zero.
-
- LONGIND returns the index in LONGOPT of the long-named option found.
- It is only valid when a long-named option has been found by the most
- recent call.
-
- If LONG_ONLY is nonzero, '-' as well as '--' can introduce
- long-named options. */
-
-int
-_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
- int argc;
- char *const *argv;
- const char *optstring;
- const struct option *longopts;
- int *longind;
- int long_only;
-{
- int print_errors = opterr;
- if (optstring[0] == ':')
- print_errors = 0;
-
- if (argc < 1)
- return -1;
-
- optarg = NULL;
-
- if (optind == 0 || !__getopt_initialized)
- {
- if (optind == 0)
- optind = 1; /* Don't scan ARGV[0], the program name. */
- optstring = _getopt_initialize (argc, argv, optstring);
- __getopt_initialized = 1;
- }
-
- /* Test whether ARGV[optind] points to a non-option argument.
- Either it does not have option syntax, or there is an environment flag
- from the shell indicating it is not an option. The later information
- is only used when the used in the GNU libc. */
-#if defined _LIBC && defined USE_NONOPTION_FLAGS
-# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \
- || (optind < nonoption_flags_len \
- && __getopt_nonoption_flags[optind] == '1'))
-#else
-# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
-#endif
-
- if (nextchar == NULL || *nextchar == '\0')
- {
- /* Advance to the next ARGV-element. */
-
- /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
- moved back by the user (who may also have changed the arguments). */
- if (last_nonopt > optind)
- last_nonopt = optind;
- if (first_nonopt > optind)
- first_nonopt = optind;
-
- if (ordering == PERMUTE)
- {
- /* If we have just processed some options following some non-options,
- exchange them so that the options come first. */
-
- if (first_nonopt != last_nonopt && last_nonopt != optind)
- exchange ((char **) argv);
- else if (last_nonopt != optind)
- first_nonopt = optind;
-
- /* Skip any additional non-options
- and extend the range of non-options previously skipped. */
-
- while (optind < argc && NONOPTION_P)
- optind++;
- last_nonopt = optind;
- }
-
- /* The special ARGV-element `--' means premature end of options.
- Skip it like a null option,
- then exchange with previous non-options as if it were an option,
- then skip everything else like a non-option. */
-
- if (optind != argc && !strcmp (argv[optind], "--"))
- {
- optind++;
-
- if (first_nonopt != last_nonopt && last_nonopt != optind)
- exchange ((char **) argv);
- else if (first_nonopt == last_nonopt)
- first_nonopt = optind;
- last_nonopt = argc;
-
- optind = argc;
- }
-
- /* If we have done all the ARGV-elements, stop the scan
- and back over any non-options that we skipped and permuted. */
-
- if (optind == argc)
- {
- /* Set the next-arg-index to point at the non-options
- that we previously skipped, so the caller will digest them. */
- if (first_nonopt != last_nonopt)
- optind = first_nonopt;
- return -1;
- }
-
- /* If we have come to a non-option and did not permute it,
- either stop the scan or describe it to the caller and pass it by. */
-
- if (NONOPTION_P)
- {
- if (ordering == REQUIRE_ORDER)
- return -1;
- optarg = argv[optind++];
- return 1;
- }
-
- /* We have found another option-ARGV-element.
- Skip the initial punctuation. */
-
- nextchar = (argv[optind] + 1
- + (longopts != NULL && argv[optind][1] == '-'));
- }
-
- /* Decode the current option-ARGV-element. */
-
- /* Check whether the ARGV-element is a long option.
-
- If long_only and the ARGV-element has the form "-f", where f is
- a valid short option, don't consider it an abbreviated form of
- a long option that starts with f. Otherwise there would be no
- way to give the -f short option.
-
- On the other hand, if there's a long option "fubar" and
- the ARGV-element is "-fu", do consider that an abbreviation of
- the long option, just like "--fu", and not "-f" with arg "u".
-
- This distinction seems to be the most useful approach. */
-
- if (longopts != NULL
- && (argv[optind][1] == '-'
- || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
- {
- char *nameend;
- const struct option *p;
- const struct option *pfound = NULL;
- int exact = 0;
- int ambig = 0;
- int indfound = -1;
- int option_index;
-
- for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
- /* Do nothing. */ ;
-
- /* Test all long options for either exact match
- or abbreviated matches. */
- for (p = longopts, option_index = 0; p->name; p++, option_index++)
- if (!strncmp (p->name, nextchar, nameend - nextchar))
- {
- if ((unsigned int) (nameend - nextchar)
- == (unsigned int) strlen (p->name))
- {
- /* Exact match found. */
- pfound = p;
- indfound = option_index;
- exact = 1;
- break;
- }
- else if (pfound == NULL)
- {
- /* First nonexact match found. */
- pfound = p;
- indfound = option_index;
- }
- else if (long_only
- || pfound->has_arg != p->has_arg
- || pfound->flag != p->flag
- || pfound->val != p->val)
- /* Second or later nonexact match found. */
- ambig = 1;
- }
-
- if (ambig && !exact)
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, "%s: option `%s' is ambiguous\n",
- argv[0], argv[optind]) >= 0)
- {
-
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr, "%s: option `%s' is ambiguous\n",
- argv[0], argv[optind]);
-#endif
- }
- nextchar += strlen (nextchar);
- optind++;
- optopt = 0;
- return '?';
- }
-
- if (pfound != NULL)
- {
- option_index = indfound;
- optind++;
- if (*nameend)
- {
- /* Don't test has_arg with >, because some C compilers don't
- allow it to be used on enums. */
- if (pfound->has_arg)
- optarg = nameend + 1;
- else
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
- int n;
-#endif
-
- if (argv[optind - 1][1] == '-')
- {
- /* --option */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, "\
-%s: option `--%s' doesn't allow an argument\n",
- argv[0], pfound->name);
-#else
- fprintf (stderr, "\
-%s: option `--%s' doesn't allow an argument\n",
- argv[0], pfound->name);
-#endif
- }
- else
- {
- /* +option or -option */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, "\
-%s: option `%c%s' doesn't allow an argument\n",
- argv[0], argv[optind - 1][0],
- pfound->name);
-#else
- fprintf (stderr, "\
-%s: option `%c%s' doesn't allow an argument\n",
- argv[0], argv[optind - 1][0], pfound->name);
-#endif
- }
-
-#if defined _LIBC && defined USE_IN_LIBIO
- if (n >= 0)
- {
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#endif
- }
-
- nextchar += strlen (nextchar);
-
- optopt = pfound->val;
- return '?';
- }
- }
- else if (pfound->has_arg == 1)
- {
- if (optind < argc)
- optarg = argv[optind++];
- else
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, "\
-%s: option `%s' requires an argument\n",
- argv[0], argv[optind - 1]) >= 0)
- {
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr,
- "%s: option `%s' requires an argument\n",
- argv[0], argv[optind - 1]);
-#endif
- }
- nextchar += strlen (nextchar);
- optopt = pfound->val;
- return optstring[0] == ':' ? ':' : '?';
- }
- }
- nextchar += strlen (nextchar);
- if (longind != NULL)
- *longind = option_index;
- if (pfound->flag)
- {
- *(pfound->flag) = pfound->val;
- return 0;
- }
- return pfound->val;
- }
-
- /* Can't find it as a long option. If this is not getopt_long_only,
- or the option starts with '--' or is not a valid short
- option, then it's an error.
- Otherwise interpret it as a short option. */
- if (!long_only || argv[optind][1] == '-'
- || my_index (optstring, *nextchar) == NULL)
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
- int n;
-#endif
-
- if (argv[optind][1] == '-')
- {
- /* --option */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, "%s: unrecognized option `--%s'\n",
- argv[0], nextchar);
-#else
- fprintf (stderr, "%s: unrecognized option `--%s'\n",
- argv[0], nextchar);
-#endif
- }
- else
- {
- /* +option or -option */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, "%s: unrecognized option `%c%s'\n",
- argv[0], argv[optind][0], nextchar);
-#else
- fprintf (stderr, "%s: unrecognized option `%c%s'\n",
- argv[0], argv[optind][0], nextchar);
-#endif
- }
-
-#if defined _LIBC && defined USE_IN_LIBIO
- if (n >= 0)
- {
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#endif
- }
- nextchar = (char *) "";
- optind++;
- optopt = 0;
- return '?';
- }
- }
-
- /* Look at and handle the next short option-character. */
-
- {
- char c = *nextchar++;
- char *temp = my_index (optstring, c);
-
- /* Increment `optind' when we start to process its last character. */
- if (*nextchar == '\0')
- ++optind;
-
- if (temp == NULL || c == ':')
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
- int n;
-#endif
-
- if (posixly_correct)
- {
- /* 1003.2 specifies the format of this message. */
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, "%s: illegal option -- %c\n",
- argv[0], c);
-#else
- fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
-#endif
- }
- else
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- n = __asprintf (&buf, "%s: invalid option -- %c\n",
- argv[0], c);
-#else
- fprintf (stderr, "%s: invalid option -- %c\n", argv[0], c);
-#endif
- }
-
-#if defined _LIBC && defined USE_IN_LIBIO
- if (n >= 0)
- {
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#endif
- }
- optopt = c;
- return '?';
- }
- /* Convenience. Treat POSIX -W foo same as long option --foo */
- if (temp[0] == 'W' && temp[1] == ';')
- {
- char *nameend;
- const struct option *p;
- const struct option *pfound = NULL;
- int exact = 0;
- int ambig = 0;
- int indfound = 0;
- int option_index;
-
- /* This is an option that requires an argument. */
- if (*nextchar != '\0')
- {
- optarg = nextchar;
- /* If we end this ARGV-element by taking the rest as an arg,
- we must advance to the next element now. */
- optind++;
- }
- else if (optind == argc)
- {
- if (print_errors)
- {
- /* 1003.2 specifies the format of this message. */
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf,
- "%s: option requires an argument -- %c\n",
- argv[0], c) >= 0)
- {
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr, "%s: option requires an argument -- %c\n",
- argv[0], c);
-#endif
- }
- optopt = c;
- if (optstring[0] == ':')
- c = ':';
- else
- c = '?';
- return c;
- }
- else
- /* We already incremented `optind' once;
- increment it again when taking next ARGV-elt as argument. */
- optarg = argv[optind++];
-
- /* optarg is now the argument, see if it's in the
- table of longopts. */
-
- for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
- /* Do nothing. */ ;
-
- /* Test all long options for either exact match
- or abbreviated matches. */
- for (p = longopts, option_index = 0; p->name; p++, option_index++)
- if (!strncmp (p->name, nextchar, nameend - nextchar))
- {
- if ((unsigned int) (nameend - nextchar) == strlen (p->name))
- {
- /* Exact match found. */
- pfound = p;
- indfound = option_index;
- exact = 1;
- break;
- }
- else if (pfound == NULL)
- {
- /* First nonexact match found. */
- pfound = p;
- indfound = option_index;
- }
- else
- /* Second or later nonexact match found. */
- ambig = 1;
- }
- if (ambig && !exact)
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, "%s: option `-W %s' is ambiguous\n",
- argv[0], argv[optind]) >= 0)
- {
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr, "%s: option `-W %s' is ambiguous\n",
- argv[0], argv[optind]);
-#endif
- }
- nextchar += strlen (nextchar);
- optind++;
- return '?';
- }
- if (pfound != NULL)
- {
- option_index = indfound;
- if (*nameend)
- {
- /* Don't test has_arg with >, because some C compilers don't
- allow it to be used on enums. */
- if (pfound->has_arg)
- optarg = nameend + 1;
- else
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, "\
-%s: option `-W %s' doesn't allow an argument\n",
- argv[0], pfound->name) >= 0)
- {
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr, "\
-%s: option `-W %s' doesn't allow an argument\n",
- argv[0], pfound->name);
-#endif
- }
-
- nextchar += strlen (nextchar);
- return '?';
- }
- }
- else if (pfound->has_arg == 1)
- {
- if (optind < argc)
- optarg = argv[optind++];
- else
- {
- if (print_errors)
- {
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, "\
-%s: option `%s' requires an argument\n",
- argv[0], argv[optind - 1]) >= 0)
- {
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr,
- "%s: option `%s' requires an argument\n",
- argv[0], argv[optind - 1]);
-#endif
- }
- nextchar += strlen (nextchar);
- return optstring[0] == ':' ? ':' : '?';
- }
- }
- nextchar += strlen (nextchar);
- if (longind != NULL)
- *longind = option_index;
- if (pfound->flag)
- {
- *(pfound->flag) = pfound->val;
- return 0;
- }
- return pfound->val;
- }
- nextchar = NULL;
- return 'W'; /* Let the application handle it. */
- }
- if (temp[1] == ':')
- {
- if (temp[2] == ':')
- {
- /* This is an option that accepts an argument optionally. */
- if (*nextchar != '\0')
- {
- optarg = nextchar;
- optind++;
- }
- else
- optarg = NULL;
- nextchar = NULL;
- }
- else
- {
- /* This is an option that requires an argument. */
- if (*nextchar != '\0')
- {
- optarg = nextchar;
- /* If we end this ARGV-element by taking the rest as an arg,
- we must advance to the next element now. */
- optind++;
- }
- else if (optind == argc)
- {
- if (print_errors)
- {
- /* 1003.2 specifies the format of this message. */
-#if defined _LIBC && defined USE_IN_LIBIO
- char *buf;
-
- if (__asprintf (&buf, "\
-%s: option requires an argument -- %c\n",
- argv[0], c) >= 0)
- {
- if (_IO_fwide (stderr, 0) > 0)
- __fwprintf (stderr, L"%s", buf);
- else
- fputs (buf, stderr);
-
- free (buf);
- }
-#else
- fprintf (stderr,
- "%s: option requires an argument -- %c\n",
- argv[0], c);
-#endif
- }
- optopt = c;
- if (optstring[0] == ':')
- c = ':';
- else
- c = '?';
- }
- else
- /* We already incremented `optind' once;
- increment it again when taking next ARGV-elt as argument. */
- optarg = argv[optind++];
- nextchar = NULL;
- }
- }
- return c;
- }
-}
-
-int
-getopt (argc, argv, optstring)
- int argc;
- char *const *argv;
- const char *optstring;
-{
- return _getopt_internal (argc, argv, optstring,
- (const struct option *) 0,
- (int *) 0,
- 0);
-}
-
-#endif /* Not ELIDE_CODE. */
-
-#ifdef TEST
-
-/* Compile with -DTEST to make an executable for use in testing
- the above definition of `getopt'. */
-
-int
-main (argc, argv)
- int argc;
- char **argv;
-{
- int c;
- int digit_optind = 0;
-
- while (1)
- {
- int this_option_optind = optind ? optind : 1;
-
- c = getopt (argc, argv, "abc:d:0123456789");
- if (c == -1)
- break;
-
- switch (c)
- {
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (digit_optind != 0 && digit_optind != this_option_optind)
- printf ("digits occur in two different argv-elements.\n");
- digit_optind = this_option_optind;
- printf ("option %c\n", c);
- break;
-
- case 'a':
- printf ("option a\n");
- break;
-
- case 'b':
- printf ("option b\n");
- break;
-
- case 'c':
- printf ("option c with value `%s'\n", optarg);
- break;
-
- case '?':
- break;
-
- default:
- printf ("?? getopt returned character code 0%o ??\n", c);
- }
- }
-
- if (optind < argc)
- {
- printf ("non-option ARGV-elements: ");
- while (optind < argc)
- printf ("%s ", argv[optind++]);
- printf ("\n");
- }
-
- exit (0);
-}
-
-#endif /* TEST */
=====================================
src/getopt.h deleted
=====================================
@@ -1,181 +0,0 @@
-/* Declarations for getopt.
- Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef _GETOPT_H
-
-#ifndef __need_getopt
-# define _GETOPT_H 1
-#endif
-
-/* If __GNU_LIBRARY__ is not already defined, either we are being used
- standalone, or this is the first header included in the source file.
- If we are being used with glibc, we need to include <features.h>, but
- that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
- not defined, include <ctype.h>, which will pull in <features.h> for us
- if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
- doesn't flood the namespace with stuff the way some other headers do.) */
-#if !defined __GNU_LIBRARY__
-# include <ctype.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* For communication from `getopt' to the caller.
- When `getopt' finds an option that takes an argument,
- the argument value is returned here.
- Also, when `ordering' is RETURN_IN_ORDER,
- each non-option ARGV-element is returned here. */
-
-extern char *optarg;
-
-/* Index in ARGV of the next element to be scanned.
- This is used for communication to and from the caller
- and for communication between successive calls to `getopt'.
-
- On entry to `getopt', zero means this is the first call; initialize.
-
- When `getopt' returns -1, this is the index of the first of the
- non-option elements that the caller should itself scan.
-
- Otherwise, `optind' communicates from one call to the next
- how much of ARGV has been scanned so far. */
-
-extern int optind;
-
-/* Callers store zero here to inhibit the error message `getopt' prints
- for unrecognized options. */
-
-extern int opterr;
-
-/* Set to an option character which was unrecognized. */
-
-extern int optopt;
-
-#ifndef __need_getopt
-/* Describe the long-named options requested by the application.
- The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
- of `struct option' terminated by an element containing a name which is
- zero.
-
- The field `has_arg' is:
- no_argument (or 0) if the option does not take an argument,
- required_argument (or 1) if the option requires an argument,
- optional_argument (or 2) if the option takes an optional argument.
-
- If the field `flag' is not NULL, it points to a variable that is set
- to the value given in the field `val' when the option is found, but
- left unchanged if the option is not found.
-
- To have a long-named option do something other than set an `int' to
- a compiled-in constant, such as set a value from `optarg', set the
- option's `flag' field to zero and its `val' field to a nonzero
- value (the equivalent single-letter option character, if there is
- one). For long options that have a zero `flag' field, `getopt'
- returns the contents of the `val' field. */
-
-struct option
-{
-# if (defined __STDC__ && __STDC__) || defined __cplusplus
- const char *name;
-# else
- char *name;
-# endif
- /* has_arg can't be an enum because some compilers complain about
- type mismatches in all the code that assumes it is an int. */
- int has_arg;
- int *flag;
- int val;
-};
-
-/* Names for the values of the `has_arg' field of `struct option'. */
-
-# define no_argument 0
-# define required_argument 1
-# define optional_argument 2
-#endif /* need getopt */
-
-
-/* Get definitions and prototypes for functions to process the
- arguments in ARGV (ARGC of them, minus the program name) for
- options given in OPTS.
-
- Return the option character from OPTS just read. Return -1 when
- there are no more options. For unrecognized options, or options
- missing arguments, `optopt' is set to the option letter, and '?' is
- returned.
-
- The OPTS string is a list of characters which are recognized option
- letters, optionally followed by colons, specifying that that letter
- takes an argument, to be placed in `optarg'.
-
- If a letter in OPTS is followed by two colons, its argument is
- optional. This behavior is specific to the GNU `getopt'.
-
- The argument `--' causes premature termination of argument
- scanning, explicitly telling `getopt' that there are no more
- options.
-
- If OPTS begins with `--', then non-option arguments are treated as
- arguments to the option '\0'. This behavior is specific to the GNU
- `getopt'. */
-
-#if (defined __STDC__ && __STDC__) || defined __cplusplus
-# ifdef __GNU_LIBRARY__
-/* Many other libraries have conflicting prototypes for getopt, with
- differences in the consts, in stdlib.h. To avoid compilation
- errors, only prototype getopt for the GNU C library. */
-extern int getopt (int ___argc, char *const *___argv, const char *__shortopts);
-# else /* not __GNU_LIBRARY__ */
-extern int getopt ();
-# endif /* __GNU_LIBRARY__ */
-
-# ifndef __need_getopt
-extern int getopt_long (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind);
-extern int getopt_long_only (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind);
-
-/* Internal only. Users should not call this directly. */
-extern int _getopt_internal (int ___argc, char *const *___argv,
- const char *__shortopts,
- const struct option *__longopts, int *__longind,
- int __long_only);
-# endif
-#else /* not __STDC__ */
-extern int getopt ();
-# ifndef __need_getopt
-extern int getopt_long ();
-extern int getopt_long_only ();
-
-extern int _getopt_internal ();
-# endif
-#endif /* __STDC__ */
-
-#ifdef __cplusplus
-}
-#endif
-
-/* Make sure we later can get all the definitions and declarations. */
-#undef __need_getopt
-
-#endif /* getopt.h */
=====================================
src/getopt1.c deleted
=====================================
@@ -1,196 +0,0 @@
-/* getopt_long and getopt_long_only entry points for GNU getopt.
- Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#ifdef _LIBC
-# include <getopt.h>
-#else
-# include "getopt.h"
-#endif
-
-#if !defined __STDC__ || !__STDC__
-/* This is a separate conditional since some stdc systems
- reject `defined (const)'. */
-#ifndef const
-#define const
-#endif
-#endif
-
-#include <stdio.h>
-
-/* Comment out all this code if we are using the GNU C Library, and are not
- actually compiling the library itself. This code is part of the GNU C
- Library, but also included in many other GNU distributions. Compiling
- and linking in this code is a waste when using the GNU C library
- (especially if it is a shared library). Rather than having every GNU
- program understand `configure --with-gnu-libc' and omit the object files,
- it is simpler to just do this in the source for each such file. */
-
-#define GETOPT_INTERFACE_VERSION 2
-#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
-#include <gnu-versions.h>
-#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
-#define ELIDE_CODE
-#endif
-#endif
-
-#ifndef ELIDE_CODE
-
-
-/* This needs to come after some library #include
- to get __GNU_LIBRARY__ defined. */
-#ifdef __GNU_LIBRARY__
-#include <stdlib.h>
-#endif
-
-#ifndef NULL
-#define NULL 0
-#endif
-
-int
-getopt_long (argc, argv, options, long_options, opt_index)
- int argc;
- char *const *argv;
- const char *options;
- const struct option *long_options;
- int *opt_index;
-{
- return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
-}
-
-/* Like getopt_long, but '-' as well as '--' can indicate a long option.
- If an option that starts with '-' (not '--') doesn't match a long option,
- but does match a short option, it is parsed as a short option
- instead. */
-
-int
-getopt_long_only (argc, argv, options, long_options, opt_index)
- int argc;
- char *const *argv;
- const char *options;
- const struct option *long_options;
- int *opt_index;
-{
- return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
-}
-
-# ifdef _LIBC
-libc_hidden_def (getopt_long)
-libc_hidden_def (getopt_long_only)
-# endif
-
-#endif /* Not ELIDE_CODE. */
-
-#ifdef TEST
-
-#include <stdio.h>
-
-int
-main (argc, argv)
- int argc;
- char **argv;
-{
- int c;
- int digit_optind = 0;
-
- while (1)
- {
- int this_option_optind = optind ? optind : 1;
- int option_index = 0;
- static struct option long_options[] =
- {
- {"add", 1, 0, 0},
- {"append", 0, 0, 0},
- {"delete", 1, 0, 0},
- {"verbose", 0, 0, 0},
- {"create", 0, 0, 0},
- {"file", 1, 0, 0},
- {0, 0, 0, 0}
- };
-
- c = getopt_long (argc, argv, "abc:d:0123456789",
- long_options, &option_index);
- if (c == -1)
- break;
-
- switch (c)
- {
- case 0:
- printf ("option %s", long_options[option_index].name);
- if (optarg)
- printf (" with arg %s", optarg);
- printf ("\n");
- break;
-
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (digit_optind != 0 && digit_optind != this_option_optind)
- printf ("digits occur in two different argv-elements.\n");
- digit_optind = this_option_optind;
- printf ("option %c\n", c);
- break;
-
- case 'a':
- printf ("option a\n");
- break;
-
- case 'b':
- printf ("option b\n");
- break;
-
- case 'c':
- printf ("option c with value `%s'\n", optarg);
- break;
-
- case 'd':
- printf ("option d with value `%s'\n", optarg);
- break;
-
- case '?':
- break;
-
- default:
- printf ("?? getopt returned character code 0%o ??\n", c);
- }
- }
-
- if (optind < argc)
- {
- printf ("non-option ARGV-elements: ");
- while (optind < argc)
- printf ("%s ", argv[optind++]);
- printf ("\n");
- }
-
- exit (0);
-}
-
-#endif /* TEST */
=====================================
src/gmap.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: gmap.c 222314 2020-04-05 17:18:05Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -324,7 +324,9 @@ static int min_matches;
bool multiple_sequences_p = false;
#endif
-static bool sharedp = true;
+static bool sharedp = false;
+static bool preload_shared_memory_p = false;
+static bool unload_shared_memory_p = false;
static bool expand_offsets_p = false;
#ifdef HAVE_MMAP
@@ -537,6 +539,9 @@ static struct option long_options[] = {
/* Compute options */
+ {"use-shared-memory", required_argument, 0, 0}, /* sharedp */
+ {"preload-shared-memory", no_argument, 0, 0}, /* preload_shared_memory_p */
+ {"unload-shared-memory", no_argument, 0, 0}, /* unload_shared_memory_p */
#ifdef HAVE_MMAP
{"batch", required_argument, 0, 'B'}, /* offsetsstrm_access, positions_access, genome_access */
#endif
@@ -5722,6 +5727,22 @@ parse_command_line (int argc, char *argv[], int optind) {
} else if (!strcmp(long_name,"time")) {
timingp = true;
+ } else if (!strcmp(long_name,"use-shared-memory")) {
+ if (!strcmp(optarg,"1")) {
+ sharedp = true;
+ } else if (!strcmp(optarg,"0")) {
+ sharedp = false;
+ } else {
+ fprintf(stderr,"--use-shared-memory flag must be 0 or 1\n");
+ return 9;
+ }
+
+ } else if (!strcmp(long_name,"preload-shared-memory")) {
+ preload_shared_memory_p = true;
+
+ } else if (!strcmp(long_name,"unload-shared-memory")) {
+ unload_shared_memory_p = true;
+
} else if (!strcmp(long_name,"expand-offsets")) {
fprintf(stderr,"Note: --expand-offsets flag is no longer supported. With the latest algorithms, it doesn't improve speed much. Ignoring this flag");
@@ -6186,6 +6207,10 @@ parse_command_line (int argc, char *argv[], int optind) {
printtype = SPLICESITES;
} else if (!strcmp(optarg,"introns")) {
printtype = INTRONS;
+ } else if (!strcmp(optarg,"mask_introns")) {
+ printtype = MASK_INTRONS;
+ } else if (!strcmp(optarg,"mask_utr_introns")) {
+ printtype = MASK_UTR_INTRONS;
} else if (!strcmp(optarg,"samse")) {
printtype = SAM;
sam_paired_p = false;
@@ -6216,6 +6241,8 @@ parse_command_line (int argc, char *argv[], int optind) {
fprintf(stderr," psl\n");
fprintf(stderr," splicesites (6)\n");
fprintf(stderr," introns\n");
+ fprintf(stderr," mask_introns\n");
+ fprintf(stderr," mask_utr_introns\n");
fprintf(stderr," samse\n");
fprintf(stderr," sampe\n");
fprintf(stderr," bedpe\n");
@@ -6824,13 +6851,13 @@ main (int argc, char *argv[]) {
&alphabet,&alphabet_size,required_alphabet,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false);
+ multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p);
indexdb_rev = Indexdb_new_genome(&index1part_aa,&index1interval,
genomesubdir,fileroot,REV_FILESUFFIX,/*snps_root*/NULL,
&alphabet,&alphabet_size,required_alphabet,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,sharedp,
- multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false);
+ multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p);
if (indexdb_fwd == NULL || indexdb_rev == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets or %s.%s*offsets.\n",
@@ -6889,7 +6916,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metct",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -6898,7 +6925,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metga",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -6914,7 +6941,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6923,7 +6950,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6939,7 +6966,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6948,7 +6975,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -6959,7 +6986,7 @@ main (int argc, char *argv[]) {
genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find offsets file %s.%s*offsets, needed for GSNAP\n",fileroot,IDX_FILESUFFIX);
exit(9);
}
@@ -6970,7 +6997,7 @@ main (int argc, char *argv[]) {
genomesubdir,fileroot,IDX_FILESUFFIX,/*snps_root*/NULL,
required_local1part,required_local1interval,
locoffsetsstrm_access,locpositions_access,
- sharedp,multiple_sequences_p,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,unload_shared_memory_p)) == NULL) {
/* Not using localdb currently, since Oligoindex_hr seems to be faster than Oligoindex_localdb */
fprintf(stderr,"Warning: Cannot find localdb files. GMAP will run on this older index, but there is a newer index format available\n");
@@ -7036,7 +7063,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metct",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find metct index file. Need to run cmetindex first\n");
exit(9);
}
@@ -7044,7 +7071,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"metga",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find metga index file. Need to run cmetindex first\n");
exit(9);
}
@@ -7060,7 +7087,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -7068,7 +7095,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -7084,7 +7111,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2itc",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2itc index file. Need to run atoiindex first\n");
exit(9);
}
@@ -7092,7 +7119,7 @@ main (int argc, char *argv[]) {
modedir,fileroot,/*idx_filesuffix*/"a2iag",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false)) == NULL) {
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p)) == NULL) {
fprintf(stderr,"Cannot find a2iag index file. Need to run atoiindex first\n");
exit(9);
}
@@ -7102,7 +7129,7 @@ main (int argc, char *argv[]) {
snpsdir,fileroot,/*idx_filesuffix*/"ref",snps_root,
required_index1part,required_index1interval,
expand_offsets_p,offsetsstrm_access,positions_access,
- sharedp,multiple_sequences_p,/*preload_shared_memory_p*/false,/*unload_shared_memory_p*/false);
+ sharedp,multiple_sequences_p,preload_shared_memory_p,unload_shared_memory_p);
if (indexdb_fwd == NULL) {
fprintf(stderr,"Cannot find snps index file for %s in directory %s\n",snps_root,snpsdir);
exit(9);
@@ -7651,6 +7678,11 @@ Usage: gmap [OPTIONS...] <FASTA files...>, or\n\
");
#endif
+ fprintf(stdout,"\
+ --use-shared-memory=INT If 1, then allocated memory is shared among all processes on this node\n\
+ If 0 (default), then each process has private allocated memory\n\
+");
+
fprintf(stdout,"\
--nosplicing Turns off splicing (useful for aligning genomic sequences\n\
onto a genome)\n\
@@ -7793,6 +7825,8 @@ Output types\n\
fprintf(stdout,"\
-f, --format=INT Other format for output (also note the -A and -S options\n\
and other options listed under Output types):\n\
+ mask_introns,\n\
+ mask_utr_introns,\n\
psl_pro (or 0) = PSL format in protein coords,\n\
psl_nt (or 1) = PSL format in nucleotide coords,\n\
gff3_gene (or 2) = GFF3 gene format,\n\
@@ -7806,6 +7840,8 @@ Output types\n\
fprintf(stdout,"\
-f, --format=INT Other format for output (also note the -A and -S options\n\
and other options listed under Output types):\n\
+ mask_introns,\n\
+ mask_utr_introns,\n\
psl (or 1) = PSL (BLAT) format,\n\
gff3_gene (or 2) = GFF3 gene format,\n\
gff3_match_cdna (or 3) = GFF3 cDNA_match format,\n\
=====================================
src/gmapindex.c
=====================================
@@ -1,10 +1,12 @@
-static char rcsid[] = "$Id: gmapindex.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: gmapindex.c 222388 2020-04-09 05:36:24Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include <stdlib.h>
+#include <libgen.h> /* For basename */
+
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
@@ -91,10 +93,9 @@ typedef Tableuint_T Table_chrpos_T;
/* Program variables */
-typedef enum {NONE, AUXFILES, GENOME, CONCATENATE_GENOMES, UNSHUFFLE, COUNT,
+typedef enum {NONE, AUXFILES, GENOME, COMPRESS_GENOMES, CONCATENATE_GENOMES, UNSHUFFLE, COUNT,
OFFSETS, POSITIONS, REGIONDB, CONCATENATE_REGIONDBS} Action_T;
static Action_T action = NONE;
-static char *sourcedirs = ".";
static char *destdir = ".";
static char *fileroot = NULL;
static int compression_types = BITPACK64_COMPRESSION;
@@ -1338,7 +1339,7 @@ main (int argc, char *argv[]) {
char *typestring;
Univcoord_T *genomelengths, genomelength, totalnts;
char *chromosomefile, *iitfile, *positionsfile_high, *positionsfile_low, interval_char, region_interval_char;
- char *sourcedirs_copy, *sourcedir, **inputfiles, *outputfile;
+ char *source_genome, *source_dbroot, **inputfiles, *outputfile;
int nfiles;
Indexdb_filenames_T ifilenames;
@@ -1359,9 +1360,8 @@ main (int argc, char *argv[]) {
extern char *optarg;
char *string;
- while ((c = getopt(argc,argv,"F:D:d:z:k:q:j:ArlGCUNHOPQRWw:e:Ss:n:m9")) != -1) {
+ while ((c = getopt(argc,argv,"D:d:z:k:q:j:ArlGZCUNHOPQRWw:e:Ss:n:m9")) != -1) {
switch (c) {
- case 'F': sourcedirs = optarg; break;
case 'D': destdir = optarg; break;
case 'd': fileroot = optarg; break;
@@ -1395,6 +1395,7 @@ main (int argc, char *argv[]) {
case 'r': rawp = true; break;
case 'l': genome_lc_p = true; break;
case 'G': action = GENOME; break;
+ case 'Z': action = COMPRESS_GENOMES; break;
case 'C': action = CONCATENATE_GENOMES; break;
case 'U': action = UNSHUFFLE; break;
case 'N': action = COUNT; break;
@@ -1462,7 +1463,7 @@ main (int argc, char *argv[]) {
exit(9);
}
- if (action != UNSHUFFLE) {
+ if (action != COMPRESS_GENOMES && action != UNSHUFFLE) {
if (fileroot == NULL) {
fprintf(stderr,"Missing name of genome database. Must specify with -d flag.\n");
exit(9);
@@ -1644,9 +1645,17 @@ main (int argc, char *argv[]) {
Univ_IIT_free(&chromosome_iit);
Univ_IIT_free(&contig_iit);
+ } else if (action == COMPRESS_GENOMES) {
+
+ if (argc == 0) {
+ Compress_compress(/*inputfiles*/NULL,/*nfiles*/0,/*stdin_p*/true);
+ } else {
+ Compress_compress(/*inputfiles*/argv,/*nfiles*/argc,/*stdin_p*/false);
+ }
+
} else if (action == CONCATENATE_GENOMES) {
- /* Usage: gmapindex [-F <sourcedirs>] [-D <destdir>] -d <dbname> -C <input_genomes...>
- Requires <sourcedir>/<input_genome>.chromosome.iit for each input genome on the command line to get genomelengths
+ /* Usage: gmapindex [-D <destdir>] -d <dbname> -C </path/to/input_genomes...>
+ Requires <input_genome>.chromosome.iit for each input genome on the command line to get genomelengths
Creates <destdir>/<dbname>.genomecomp (horizontal format in blocks of 32).
Then the unshuffle command creates another version (vertical format in blocks of 128). */
@@ -1654,39 +1663,31 @@ main (int argc, char *argv[]) {
inputfiles = (char **) MALLOC(nfiles*sizeof(char *));
genomelengths = (Univcoord_T *) MALLOC(nfiles*sizeof(Univcoord_T));
- sourcedirs_copy = (char *) MALLOC((strlen(sourcedirs)+1)*sizeof(sourcedirs));
for (i = 0; i < nfiles; i++) {
- strcpy(sourcedirs_copy,sourcedirs);
-
- if ((sourcedir = strtok(sourcedirs_copy,",")) != NULL) {
- chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
- strlen(argv[i])+strlen(".chromosome.iit")+1,sizeof(char));
- sprintf(chromosomefile,"%s/%s/%s.chromosome.iit",sourcedir,argv[i],argv[i]);
- }
- while (sourcedir != NULL && (chromosome_iit = Univ_IIT_read(chromosomefile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
- FREE(chromosomefile);
- if ((sourcedir = strtok(NULL,",")) != NULL) {
- chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
- strlen(argv[i])+strlen(".chromosome.iit")+1,sizeof(char));
- sprintf(chromosomefile,"%s/%s/%s.chromosome.iit",sourcedir,argv[i],argv[i]);
- }
- }
- if (sourcedir == NULL) {
- fprintf(stderr,"Could not find chromosome IIT file for %s in any of the given sourcedirs\n",argv[i]);
+ source_genome = MALLOC((strlen(argv[i])+1)*sizeof(char));
+ strcpy(source_genome,argv[i]);
+ source_dbroot = basename(source_genome);
+
+ chromosomefile = (char *) CALLOC(strlen(argv[i])+strlen("/")+
+ strlen(source_dbroot)+strlen(".chromosome.iit")+1,sizeof(char));
+ sprintf(chromosomefile,"%s/%s.chromosome.iit",argv[i],source_dbroot);
+ if ((chromosome_iit = Univ_IIT_read(chromosomefile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
+ fprintf(stderr,"Could not find chromosome IIT file for the input genome %s\n",argv[i]);
+ fprintf(stderr,"Expecting to find %s\n",chromosomefile);
exit(9);
} else {
FREE(chromosomefile);
}
- inputfiles[i] = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
- strlen(argv[i])+strlen(".genomecomp")+1,sizeof(char));
- sprintf(inputfiles[i],"%s/%s/%s.genomecomp",sourcedir,argv[i],argv[i]);
+ inputfiles[i] = (char *) CALLOC(strlen(argv[i])+strlen("/")+
+ strlen(source_dbroot)+strlen(".genomecomp")+1,sizeof(char));
+ sprintf(inputfiles[i],"%s/%s.genomecomp",argv[i],source_dbroot);
genomelengths[i] = Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias_p*/true);
fprintf(stderr,"genomelength of %s is %llu bp\n",argv[i],genomelengths[i]);
Univ_IIT_free(&chromosome_iit);
+ FREE(source_genome);
}
- FREE(sourcedirs_copy);
outputfile = (char *) CALLOC(strlen(destdir)+strlen("/")+
strlen(fileroot)+strlen(".genomecomp")+1,sizeof(char));
@@ -1897,8 +1898,8 @@ main (int argc, char *argv[]) {
Univ_IIT_free(&chromosome_iit);
} else if (action == CONCATENATE_REGIONDBS) {
- /* Usage: gmapindex [-F <sourcedirs>] [-D <destdir>] -d <dbname> -R <input_genomes...>
- Requires <sourcedir>/<input_genome>.chromosome.iit for each input genome on the command line to get genomelengths
+ /* Usage: gmapindex [-D <destdir>] -d <dbname> -R </path/to/input_genomes...>
+ Requires <input_genome>.chromosome.iit for each input genome on the command line to get genomelengths
Creates <destdir>/<dbname>.genomecomp (horizontal format in blocks of 32).
Then the unshuffle command creates another version (vertical format in blocks of 128). */
@@ -1906,38 +1907,29 @@ main (int argc, char *argv[]) {
inputfiles = (char **) MALLOC(nfiles*sizeof(char *));
genomelengths = (Univcoord_T *) MALLOC(nfiles*sizeof(Univcoord_T));
- sourcedirs_copy = (char *) MALLOC((strlen(sourcedirs)+1)*sizeof(sourcedirs));
for (i = 0; i < nfiles; i++) {
- strcpy(sourcedirs_copy,sourcedirs);
-
- if ((sourcedir = strtok(sourcedirs_copy,",")) != NULL) {
- chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
- strlen(argv[i])+strlen(".chromosome.iit")+1,sizeof(char));
- sprintf(chromosomefile,"%s/%s/%s.chromosome.iit",sourcedir,argv[i],argv[i]);
- }
- while (sourcedir != NULL && (chromosome_iit = Univ_IIT_read(chromosomefile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
- FREE(chromosomefile);
- if ((sourcedir = strtok(NULL,",")) != NULL) {
- chromosomefile = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
- strlen(argv[i])+strlen(".chromosome.iit")+1,sizeof(char));
- sprintf(chromosomefile,"%s/%s/%s.chromosome.iit",sourcedir,argv[i],argv[i]);
- }
- }
- if (sourcedir == NULL) {
- fprintf(stderr,"Could not find chromosome IIT file for %s in any of the given sourcedirs\n",argv[i]);
+ source_genome = MALLOC((strlen(argv[i])+1)*sizeof(char));
+ strcpy(source_genome,argv[i]);
+ source_dbroot = basename(source_genome);
+
+ chromosomefile = (char *) CALLOC(strlen(argv[i])+strlen("/")+
+ strlen(source_dbroot)+strlen(".chromosome.iit")+1,sizeof(char));
+ sprintf(chromosomefile,"%s/%s.chromosome.iit",argv[i],source_dbroot);
+ if ((chromosome_iit = Univ_IIT_read(chromosomefile,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
+ fprintf(stderr,"Could not find chromosome IIT file for the input genome %s\n",argv[i]);
+ fprintf(stderr,"Expecting to find %s\n",chromosomefile);
exit(9);
} else {
FREE(chromosomefile);
}
- genomesubdir = (char *) MALLOC((strlen(sourcedir)+strlen("/")+strlen(argv[i])+1)*sizeof(char));
- sprintf(genomesubdir,"%s/%s",sourcedir,argv[i]);
-
if ((rfilenames = Regiondb_get_filenames(®ion1part,®ion1interval,
- genomesubdir,argv[i],IDX_FILESUFFIX,/*snps_root*/NULL,
+ /*genomesubdir*/argv[i],/*fileroot*/source_dbroot,
+ IDX_FILESUFFIX,/*snps_root*/NULL,
required_region1part,required_region1interval)) == NULL) {
fprintf(stderr,"Could not find a matching regiondb file for %s\n",argv[i]);
exit(9);
+
} else {
if (required_region1part == 0) {
required_region1part = region1part;
@@ -1946,20 +1938,20 @@ main (int argc, char *argv[]) {
required_region1interval = region1interval;
}
- inputfiles[i] = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[i])+strlen("/")+
- strlen(argv[i])+strlen(".refXXXregiondb")+1,sizeof(char));
- sprintf(inputfiles[i],"%s/%s/%s.ref%02d%01dregiondb",
- sourcedir,argv[i],argv[i],required_region1part,required_region1interval);
+ inputfiles[i] = (char *) CALLOC(strlen(argv[i])+strlen("/")+
+ strlen(source_dbroot)+strlen(".refXXXregiondb")+1,sizeof(char));
+ sprintf(inputfiles[i],"%s/%s.ref%02d%01dregiondb",
+ argv[i],source_dbroot,required_region1part,required_region1interval);
genomelengths[i] = Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias_p*/true);
fprintf(stderr,"genomelength of %s is %llu bp\n",argv[i],genomelengths[i]);
Univ_IIT_free(&chromosome_iit);
Regiondb_filenames_free(&rfilenames);
- FREE(genomesubdir);
}
+
+ FREE(source_genome);
}
- FREE(sourcedirs_copy);
outputfile = (char *) CALLOC(strlen(destdir)+strlen("/")+
strlen(fileroot)+strlen(".refXXXregiondb")+1,sizeof(char));
=====================================
src/gsnap.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: gsnap.c 222315 2020-04-05 17:18:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -281,7 +281,7 @@ static int trim_indel_score = -2; /* was -4 */
static bool multiple_sequences_p;
-static bool sharedp = true;
+static bool sharedp = false;
static bool preload_shared_memory_p = false;
static bool unload_shared_memory_p = false;
static bool expand_offsets_p = false;
@@ -3995,11 +3995,7 @@ main (int argc, char *argv[]) {
#endif
if (preload_shared_memory_p == true || unload_shared_memory_p == true) {
-#if 0
- /* To avoid time-consuming pre-load */
- sarray_access = USE_MMAP_ONLY;
- lcp_access = USE_MMAP_ONLY;
-#endif
+ /* No need for pre-load */
} else if (multiple_sequences_p == true) {
#if 0
@@ -4402,8 +4398,8 @@ is still designed to be fast.\n\
");
#endif
fprintf(stdout,"\
- --use-shared-memory=INT If 1 (default), then allocated memory is shared among all processes\n\
- on this node. If 0, then each process has private allocated memory\n\
+ --use-shared-memory=INT If 1, then allocated memory is shared among all processes on this node\n\
+ If 0 (default), then each process has private allocated memory\n\
--preload-shared-memory Load files indicated by --batch mode into shared memory for use by other\n\
GMAP/GSNAP processes on this node, and then exit. Ignore any input files.\n\
--unload-shared-memory Unload files indicated by --batch mode into shared memory, or allow them\n\
=====================================
src/iit-read-univ.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read-univ.c 218400 2019-02-19 00:55:54Z twu $";
+static char rcsid[] = "$Id: iit-read-univ.c 222390 2020-04-10 12:44:01Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1440,78 +1440,49 @@ read_words_debug (size_t offset, size_t filesize, FILE *fp, T new) {
/* This function only assigns pointers. Subsequent accesses to
memory, other than char *, still need to be read correctly
by bigendian machines */
+/* Previously allowed read/write access, but we can assume read-only access */
#ifdef HAVE_MMAP
static bool
mmap_annotations (char *filename, T new, bool readonlyp) {
int remainder;
- if (readonlyp == true) {
- if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
+ assert(readonlyp == true);
+
+ if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
exit(9);
- }
+ }
- new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
+ new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
/*randomp*/true);
- debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
- new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
- new->labelorder_length += (size_t) remainder;
+ debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
+ new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
+ new->labelorder_length += (size_t) remainder;
- new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
+ new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
/*randomp*/true);
- debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
- new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
- new->labelpointers_length += (size_t) remainder;
+ debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
+ new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
+ new->labelpointers_length += (size_t) remainder;
- new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
+ new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
/*randomp*/true);
- debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
- new->labels = (char *) &(new->label_mmap[remainder]);
- new->label_length += (size_t) remainder;
-
- new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
- /*randomp*/true);
- debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
- new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
- new->annotpointers_length += (size_t) remainder;
+ debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
+ new->labels = (char *) &(new->label_mmap[remainder]);
+ new->label_length += (size_t) remainder;
+
+ new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
+ /*randomp*/true);
+ debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
+ new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
+ new->annotpointers_length += (size_t) remainder;
+
+ new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
+ /*randomp*/true);
+ debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
+ new->annotations = (char *) &(new->annot_mmap[remainder]);
+ new->annot_length += (size_t) remainder;
- new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
- /*randomp*/true);
- debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
- new->annotations = (char *) &(new->annot_mmap[remainder]);
- new->annot_length += (size_t) remainder;
-
- } else {
- if ((new->fd = open(filename,O_RDWR,0764)) < 0) {
- fprintf(stderr,"Error: can't open file %s with open for reading/writing\n",filename);
- exit(9);
- }
-
- new->labelorder_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
- /*randomp*/true);
- new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
- new->labelorder_length += (size_t) remainder;
-
- new->labelpointers_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
- /*randomp*/true);
- new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
- new->labelpointers_length += (size_t) remainder;
-
- new->label_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->label_offset,new->label_length,
- /*randomp*/true);
- new->labels = (char *) &(new->label_mmap[remainder]);
- new->label_length += (size_t) remainder;
-
- new->annotpointers_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
- /*randomp*/true);
- new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
- new->annotpointers_length += (size_t) remainder;
-
- new->annot_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->annot_offset,new->annot_length,
- /*randomp*/true);
- new->annotations = (char *) &(new->annot_mmap[remainder]);
- new->annot_length += (size_t) remainder;
- }
if (new->labelorder == NULL || new->labelpointers == NULL || new->labels == NULL) {
fprintf(stderr,"Memory mapping failed in reading IIT file %s. Using slow file IO instead.\n",filename);
=====================================
src/iit-read.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit-read.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: iit-read.c 222390 2020-04-10 12:44:01Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -2350,132 +2350,83 @@ read_words_debug (size_t offset, size_t filesize, FILE *fp, T new) {
/* This function only assigns pointers. Subsequent accesses to
memory, other than char *, still need to be read correctly
by bigendian machines */
+/* Previously allowed read/write access, but we can assume read-only access */
#ifdef HAVE_MMAP
static bool
mmap_annotations (char *filename, T new, bool readonlyp) {
int remainder;
- if (readonlyp == true) {
- if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
- fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
- exit(9);
- }
+ assert(readonlyp == true);
- if (new->valuep == true) {
- new->valueorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->valueorder_offset,new->valueorder_length,
- /*randomp*/true);
- debug(fprintf(stderr,"valueorder_mmap is %p\n",new->valueorder_mmap));
- new->valueorder = (int *) &(new->valueorder_mmap[remainder]);
- new->valueorder_length += (size_t) remainder;
-
- new->value_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->value_offset,new->value_length,
- /*randomp*/true);
- debug(fprintf(stderr,"values_mmap is %p\n",new->value_mmap));
- new->values = (double *) &(new->value_mmap[remainder]);
- new->value_length += (size_t) remainder;
- }
+ if ((new->fd = open(filename,O_RDONLY,0764)) < 0) {
+ fprintf(stderr,"Error: can't open file %s with open for reading\n",filename);
+ exit(9);
+ }
- new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
+ if (new->valuep == true) {
+ new->valueorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->valueorder_offset,new->valueorder_length,
/*randomp*/true);
- debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
- new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
- new->labelorder_length += (size_t) remainder;
-
- new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
- /*randomp*/true);
- debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
-#ifdef HAVE_64_BIT
- if (new->label_pointers_8p == true) {
- new->labelpointers8 = (UINT8 *) &(new->labelpointers_mmap[remainder]);
- new->labelpointers = (UINT4 *) NULL;
- } else {
- new->labelpointers8 = (UINT8 *) NULL;
- new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
- }
-#else
- new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
-#endif
- new->labelpointers_length += (size_t) remainder;
-
- new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
+ debug(fprintf(stderr,"valueorder_mmap is %p\n",new->valueorder_mmap));
+ new->valueorder = (int *) &(new->valueorder_mmap[remainder]);
+ new->valueorder_length += (size_t) remainder;
+
+ new->value_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->value_offset,new->value_length,
/*randomp*/true);
- debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
- new->labels = (char *) &(new->label_mmap[remainder]);
- new->label_length += (size_t) remainder;
-
- new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
- /*randomp*/true);
- debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
+ debug(fprintf(stderr,"values_mmap is %p\n",new->value_mmap));
+ new->values = (double *) &(new->value_mmap[remainder]);
+ new->value_length += (size_t) remainder;
+ }
+
+ new->labelorder_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
+ /*randomp*/true);
+ debug(fprintf(stderr,"labelorder_mmap is %p\n",new->labelorder_mmap));
+ new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
+ new->labelorder_length += (size_t) remainder;
+
+ new->labelpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
+ /*randomp*/true);
+ debug(fprintf(stderr,"labelpointers_mmap is %p\n",new->labelpointers_mmap));
#ifdef HAVE_64_BIT
- if (new->annot_pointers_8p == true) {
- new->annotpointers8 = (UINT8 *) &(new->annotpointers_mmap[remainder]);
- new->annotpointers = (UINT4 *) NULL;
- } else {
- new->annotpointers8 = (UINT8 *) NULL;
- new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
- }
-#else
- new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
-#endif
- new->annotpointers_length += (size_t) remainder;
-
- new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
- /*randomp*/true);
- debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
- new->annotations = (char *) &(new->annot_mmap[remainder]);
- new->annot_length += (size_t) remainder;
-
+ if (new->label_pointers_8p == true) {
+ new->labelpointers8 = (UINT8 *) &(new->labelpointers_mmap[remainder]);
+ new->labelpointers = (UINT4 *) NULL;
} else {
- if ((new->fd = open(filename,O_RDWR,0764)) < 0) {
- fprintf(stderr,"Error: can't open file %s with open for reading/writing\n",filename);
- exit(9);
- }
-
- new->labelorder_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->labelorder_offset,new->labelorder_length,
- /*randomp*/true);
- new->labelorder = (int *) &(new->labelorder_mmap[remainder]);
- new->labelorder_length += (size_t) remainder;
-
- new->labelpointers_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->labelpointers_offset,new->labelpointers_length,
- /*randomp*/true);
-#ifdef HAVE_64_BIT
- if (new->label_pointers_8p == true) {
- new->labelpointers8 = (UINT8 *) &(new->labelpointers_mmap[remainder]);
- new->labelpointers = (UINT4 *) NULL;
- } else {
- new->labelpointers8 = (UINT8 *) NULL;
- new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
- }
-#else
+ new->labelpointers8 = (UINT8 *) NULL;
new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
+ }
+#else
+ new->labelpointers = (UINT4 *) &(new->labelpointers_mmap[remainder]);
#endif
- new->labelpointers_length += (size_t) remainder;
-
- new->label_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->label_offset,new->label_length,
- /*randomp*/true);
- new->labels = (char *) &(new->label_mmap[remainder]);
- new->label_length += (size_t) remainder;
-
- new->annotpointers_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
- /*randomp*/true);
+ new->labelpointers_length += (size_t) remainder;
+
+ new->label_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->label_offset,new->label_length,
+ /*randomp*/true);
+ debug(fprintf(stderr,"labels_mmap is %p\n",new->label_mmap));
+ new->labels = (char *) &(new->label_mmap[remainder]);
+ new->label_length += (size_t) remainder;
+
+ new->annotpointers_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annotpointers_offset,new->annotpointers_length,
+ /*randomp*/true);
+ debug(fprintf(stderr,"annotpointers_mmap is %p\n",new->annotpointers_mmap));
#ifdef HAVE_64_BIT
- if (new->annot_pointers_8p == true) {
- new->annotpointers8 = (UINT8 *) &(new->annotpointers_mmap[remainder]);
- new->annotpointers = (UINT4 *) NULL;
- } else {
- new->annotpointers8 = (UINT8 *) NULL;
- new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
- }
-#else
+ if (new->annot_pointers_8p == true) {
+ new->annotpointers8 = (UINT8 *) &(new->annotpointers_mmap[remainder]);
+ new->annotpointers = (UINT4 *) NULL;
+ } else {
+ new->annotpointers8 = (UINT8 *) NULL;
new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
+ }
+#else
+ new->annotpointers = (UINT4 *) &(new->annotpointers_mmap[remainder]);
#endif
- new->annotpointers_length += (size_t) remainder;
+ new->annotpointers_length += (size_t) remainder;
+
+ new->annot_mmap = (char *) Access_mmap_offset(&remainder,new->fd,new->annot_offset,new->annot_length,
+ /*randomp*/true);
+ debug(fprintf(stderr,"annots_mmap is %p\n",new->annot_mmap));
+ new->annotations = (char *) &(new->annot_mmap[remainder]);
+ new->annot_length += (size_t) remainder;
- new->annot_mmap = (char *) Access_mmap_offset_rw(&remainder,new->fd,new->annot_offset,new->annot_length,
- /*randomp*/true);
- new->annotations = (char *) &(new->annot_mmap[remainder]);
- new->annot_length += (size_t) remainder;
- }
#ifdef HAVE_64_BIT
if (new->label_pointers_8p == true) {
=====================================
src/iit_get.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit_get.c 221574 2020-01-30 21:17:50Z twu $";
+static char rcsid[] = "$Id: iit_get.c 222203 2020-03-23 21:59:48Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -539,7 +539,8 @@ print_interval (Chrpos_T *lastcoord, long int total,
char *divstring, Chrpos_T coordstart, Chrpos_T coordend,
int index, IIT_T iit, int ndivs, int fieldint) {
Interval_T interval;
- char *label, *annotation, *restofheader;
+ char *label, *annotation, *restofheader, *p, *q;
+ Chrpos_T pos;
bool allocp;
debug(printf("Entered print_interval with coordstart %u and coordend %u\n",coordstart,coordend));
@@ -595,8 +596,30 @@ print_interval (Chrpos_T *lastcoord, long int total,
printf("%s\n",restofheader);
if (coordstart == 0 && coordend == 0) {
printf("%s",annotation);
+
} else {
- printf("%.*s\n",coordend-coordstart+1,&(annotation[coordstart-1]));
+ /* printf("%.*s\n",coordend-coordstart+1,&(annotation[coordstart-1])); */
+
+ p = annotation;
+ pos = 1;
+ while (pos < coordstart) {
+ if (*p++ != '\n') {
+ pos++;
+ }
+ }
+
+ while (*p == '\n') {
+ p++;
+ }
+ q = p;
+
+ while (pos < coordend) {
+ if (*++q != '\n') {
+ pos++;
+ }
+ }
+
+ printf("%.*s\n",(int) (q-p+1),p);
}
if (allocp == true) {
FREE(restofheader);
=====================================
src/iit_store.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: iit_store.c 221945 2020-02-21 03:16:45Z twu $";
+static char rcsid[] = "$Id: iit_store.c 222272 2020-03-27 14:29:22Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -60,6 +60,7 @@ static bool univ_format_p = false; /* IIT_write_univ used for chromosome.iit fil
static bool gff3_format_p = false;
static char *labelid = "ID";
static bool fieldsp = false;
+static bool acc_only_p = false;
static char iit_version = 0;
static Sorttype_T divsort = CHROM_SORT;
@@ -70,6 +71,7 @@ static struct option long_options[] = {
/* Input options */
{"output", required_argument, 0, 'o'}, /* outputfile */
{"univformat", no_argument, 0, '1'}, /* univ_format_p */
+ {"accession-only", no_argument, 0, 0}, /* acc_only_p */
{"fields", no_argument, 0, 'F'}, /* fieldsp */
{"gff", no_argument, 0, 'G'}, /* gff3_format_p */
{"label", required_argument, 0, 'l'}, /* labelid */
@@ -107,6 +109,7 @@ Options\n\
-o, --output=STRING Name of output iit file\n\
-1, --oldformat Old format for intervals:\n\
<start> <optional end> <optional div> <optional type>\n\
+ --accession-only Process only the first word of each FASTA header, and ignore the rest of the line\n\
-F, --fields Annotation consists of separate fields\n\
-G, --gff Parse input file in gff3 format\n\
-l, --label=STRING For gff input, the feature attribute to use (default is ID)\n\
@@ -141,7 +144,9 @@ and a given type, numeric value, or both is optional. A numeric value\n\
allows intervals to be searched by a range of values using iit_get.\n\
If the interval is omitted, then it is assumed to be label:1..n,\n\
where n is the length of the sequence. This allows for storage and retrieval\n\
-of sequences in FASTA files.\n\
+of sequences in FASTA files. If you specify --accession-only, then it is\n\
+assumed that you are not providing intervals, and all information in the FASTA\n\
+header other than the first word (accession) will be ignored.\n\
\n\
Intervals may have directions. To indicate a forward direction,\n\
the start coordinate should be less than the end coordinate.\n\
@@ -297,7 +302,7 @@ scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelis
strcpy(*label,acc);
}
- if (sscanf(header,">%s %s",acc,query) < 2) {
+ if (acc_only_p == true || sscanf(header,">%s %s",acc,query) < 2) {
/* Treat query as acc:1..n */
divstring = (char *) MALLOC((*labellength+1)*sizeof(char));
strcpy(divstring,acc);
@@ -316,7 +321,7 @@ scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelis
coords = (char *) NULL;
} else if (!index(query,':')) {
- debug(printf("Query %s has no div\n",query));
+ debug(printf("Query %s has no div.\n",query));
divstring = (char *) CALLOC(1,sizeof(char));
divstring[0] = '\0';
coords = query;
@@ -357,7 +362,8 @@ scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelis
debug(printf(" and coords %s as a range starting at %llu and ending at %llu\n",
coords,(unsigned long long) *start,(unsigned long long) *end));
} else {
- fprintf(stderr,"Error parsing %s:%s. Expecting coords (as <div>:<number>..<number>)\n",query,coords);
+ fprintf(stderr,"Error parsing %s:%s. Expecting coords (as <div>:<number>..<number>). Or specify --accession-only to ignore the second field\n",
+ query,coords);
fprintf(stderr,"Problematic line was: %s\n",header);
exit(9);
}
@@ -371,7 +377,7 @@ scan_header_div (int *labellength, bool *seenp, List_T *divlist, List_T *typelis
*value = atof(valueptr);
}
- if (sscanf(header,">%s %s %s",acc,query,tag) < 3) {
+ if (acc_only_p == true || sscanf(header,">%s %s %s",acc,query,tag) < 3) {
*type = 0;
*restofheader = (char *) NULL;
@@ -946,10 +952,23 @@ main (int argc, char *argv[]) {
extern int optind;
extern char *optarg;
int long_option_index = 0;
+ const char *long_name;
while ((opt = getopt_long(argc,argv,"o:1FGl:v:s:",
long_options,&long_option_index)) != -1) {
switch (opt) {
+
+ case 0:
+ long_name = long_options[long_option_index].name;
+ if (!strcmp(long_name,"accession-only")) {
+ acc_only_p = true;
+ } else {
+ /* Shouldn't reach here */
+ fprintf(stderr,"Don't recognize option %s. For usage, run 'iit_store --help'",long_name);
+ return 9;
+ }
+ break;
+
case 'o': outputfile = optarg; break;
case '1': univ_format_p = true; break;
case 'F': fieldsp = true; break;
=====================================
src/indexdb-cat.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: indexdb-cat.c 221945 2020-02-21 03:16:45Z twu $";
+static char rcsid[] = "$Id: indexdb-cat.c 222388 2020-04-09 05:36:24Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -19,6 +19,7 @@ static char rcsid[] = "$Id: indexdb-cat.c 221945 2020-02-21 03:16:45Z twu $";
#include <stddef.h>
#include <stdlib.h>
#include <string.h> /* For memset */
+#include <libgen.h> /* For basename */
#include "mem.h"
#include "fopen.h"
@@ -59,9 +60,7 @@ static char rcsid[] = "$Id: indexdb-cat.c 221945 2020-02-21 03:16:45Z twu $";
-static char *user_sourcedirs = NULL;
static char *user_destdir = NULL;
-static char *sourcedirs = ".";
static char *destdir = ".";
static char *fileroot = NULL;
static int compression_type;
@@ -76,7 +75,6 @@ static char *snps_root = NULL;
static struct option long_options[] = {
/* Input options */
- {"sourcedirs", required_argument, 0, 'F'}, /* user_sourcedirs */
{"destdir", required_argument, 0, 'D'}, /* user_destdir */
{"kmer", required_argument, 0, 'k'}, /* required_index1part */
{"sampling", required_argument, 0, 'q'}, /* required_index1interval */
@@ -93,7 +91,7 @@ static struct option long_options[] = {
static void
print_program_version () {
fprintf(stdout,"\n");
- fprintf(stdout,"CMETINDEX: Builds GMAP index files for methylated genomes\n");
+ fprintf(stdout,"INDEXDB_CAT: Concatenates GMAP index files\n");
fprintf(stdout,"Part of GMAP package, version %s\n",PACKAGE_VERSION);
fprintf(stdout,"Default gmap directory: %s\n",GMAPDB);
fprintf(stdout,"Thomas D. Wu, Genentech, Inc.\n");
@@ -626,7 +624,7 @@ merge_indexdbs_huge_uint8 (char *new_pages_filename, char *new_pointers_filename
int
main (int argc, char *argv[]) {
Indexdb_filenames_T ifilenames;
- char *sourcedirs_copy, *sourcedir, *filename, *genomesubdir;
+ char *source_genome, *source_dbroot, *filename, *genomesubdir;
char *new_pages_filename = NULL, *new_pointers_filename = NULL, *new_offsets_filename = NULL,
*new_positions_high_filename = NULL, *new_positions_filename = NULL;
char interval_char;
@@ -665,7 +663,6 @@ main (int argc, char *argv[]) {
}
break;
- case 'F': user_sourcedirs = optarg; break;
case 'D': user_destdir = optarg; break;
case 'd': fileroot = optarg; break;
case 'k': required_index1part = atoi(optarg); break;
@@ -688,51 +685,35 @@ main (int argc, char *argv[]) {
}
destdir = Datadir_find_genomedir(user_destdir);
- if (user_sourcedirs == NULL) {
- sourcedirs = destdir;
- } else {
- sourcedirs = user_sourcedirs;
- }
-
ngenomes = argc;
fprintf(stderr,"Concatenating %d genomes\n",ngenomes);
-
/* Chromosome IIT files. Need to determine coord_values_8p and genomelength */
genome_8p = (bool *) MALLOC(ngenomes*sizeof(bool));
genome_univcoord = (Univcoord_T *) MALLOC(ngenomes*sizeof(Univcoord_T));
total_genomelength = 0;
- sourcedirs_copy = (char *) MALLOC((strlen(sourcedirs)+1)*sizeof(char));
for (genomei = 0; genomei < ngenomes; genomei++) {
genome_univcoord[genomei] = total_genomelength;
-
- strcpy(sourcedirs_copy,sourcedirs);
- if ((sourcedir = strtok(sourcedirs_copy,",")) != NULL) {
- filename = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[genomei])+strlen("/")+
- strlen(argv[genomei])+strlen(".chromosome.iit")+1,sizeof(char));
- sprintf(filename,"%s/%s/%s.chromosome.iit",sourcedir,argv[genomei],argv[genomei]);
- }
- while (sourcedir != NULL && (chromosome_iit = Univ_IIT_read(filename,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
- FREE(filename);
- if ((sourcedir = strtok(NULL,",")) != NULL) {
- filename = (char *) CALLOC(strlen(sourcedir)+strlen("/")+ strlen(argv[genomei])+strlen("/")+
- strlen(argv[genomei])+strlen(".chromosome.iit")+1,sizeof(char));
- sprintf(filename,"%s/%s/%s.chromosome.iit",sourcedir,argv[genomei],argv[genomei]);
- }
- }
-
- if (sourcedir == NULL) {
- fprintf(stderr,"Could not find chromosome IIT file for %s in any of the given sourcedirs\n",argv[genomei]);
+ source_genome = MALLOC((strlen(argv[genomei])+1)*sizeof(char));
+ strcpy(source_genome,argv[genomei]);
+ source_dbroot = basename(source_genome);
+
+ filename = (char *) CALLOC(strlen(argv[genomei])+strlen("/")+
+ strlen(source_dbroot)+strlen(".chromosome.iit")+1,sizeof(char));
+ sprintf(filename,"%s/%s.chromosome.iit",argv[genomei],source_dbroot);
+ if ((chromosome_iit = Univ_IIT_read(filename,/*readonlyp*/true,/*add_iit_p*/false)) == NULL) {
+ fprintf(stderr,"Could not find chromosome IIT file for the input genome %s\n",argv[genomei]);
+ fprintf(stderr,"Expecting to find %s\n",filename);
exit(9);
} else {
FREE(filename);
+ FREE(source_genome);
genome_8p[genomei] = Univ_IIT_coord_values_8p(chromosome_iit);
total_genomelength += Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias_p*/true);
Univ_IIT_free(&chromosome_iit);
}
}
- FREE(sourcedirs_copy);
#ifdef HAVE_64_BIT
if (total_genomelength > 4294967295) {
@@ -746,28 +727,18 @@ main (int argc, char *argv[]) {
indexdbs = (Indexdb_T *) MALLOC(ngenomes*sizeof(Indexdb_T));
- sourcedirs_copy = (char *) MALLOC((strlen(sourcedirs)+1)*sizeof(char));
for (genomei = 0; genomei < ngenomes; genomei++) {
fprintf(stderr,"Reading genome %s\n",argv[genomei]);
-
- strcpy(sourcedirs_copy,sourcedirs);
- if ((sourcedir = strtok(sourcedirs_copy,",")) != NULL) {
- genomesubdir = (char *) MALLOC((strlen(sourcedir)+strlen("/")+strlen(argv[genomei])+1)*sizeof(char));
- sprintf(genomesubdir,"%s/%s",sourcedir,argv[genomei]);
- }
- while (sourcedir != NULL && (ifilenames = Indexdb_get_filenames(&compression_type,&index1part,&index1interval,
- genomesubdir,argv[genomei],IDX_FILESUFFIX,snps_root,
- required_index1part,required_index1interval,
- /*offsets_only_p*/false)) == NULL) {
- FREE(genomesubdir);
- if ((sourcedir = strtok(NULL,",")) != NULL) {
- genomesubdir = (char *) MALLOC((strlen(sourcedir)+strlen("/")+strlen(argv[genomei])+1)*sizeof(char));
- sprintf(genomesubdir,"%s/%s",sourcedir,argv[genomei]);
- }
- }
-
- if (sourcedir == NULL) {
- fprintf(stderr,"Could not find indexdb file for %s in any of the given sourcedirs\n",argv[genomei]);
+ source_genome = MALLOC((strlen(argv[genomei])+1)*sizeof(char));
+ strcpy(source_genome,argv[genomei]);
+ source_dbroot = basename(source_genome);
+
+ if ((ifilenames = Indexdb_get_filenames(&compression_type,&index1part,&index1interval,
+ /*genomesubdir*/source_genome,/*fileroot*/source_dbroot,
+ IDX_FILESUFFIX,snps_root,required_index1part,required_index1interval,
+ /*offsets_only_p*/false)) == NULL) {
+ fprintf(stderr,"Could not find indexdb file for the input genome %s\n",argv[genomei]);
+ exit(9);
} else {
if (required_index1part == 0) {
required_index1part = index1part;
@@ -776,21 +747,21 @@ main (int argc, char *argv[]) {
required_index1interval = index1interval;
}
if ((indexdbs[genomei] = Indexdb_new_genome(&index1part,&index1interval,
- genomesubdir,argv[genomei],IDX_FILESUFFIX,/*snps_root*/NULL,
+ /*genomesubdir*/source_genome,/*fileroot*/source_dbroot,
+ IDX_FILESUFFIX,/*snps_root*/NULL,
required_index1part,required_index1interval,
/*expand_offsets_p*/false,/*offsetsstrm_access*/USE_MMAP_ONLY,
/*positions_access*/USE_MMAP_ONLY,/*sharedp*/false,
/*multiple_sequences_p*/false,/*preload_shared_memory_p*/false,
/*unload_shared_memory_p*/false)) == NULL) {
- fprintf(stderr,"Cannot open indexdb, kmer %d, interval %d for %s\n",
+ fprintf(stderr,"Could not open indexdb, kmer %d, interval %d for %s\n",
required_index1part,required_index1interval,argv[genomei]);
exit(9);
}
Indexdb_filenames_free(&ifilenames);
- FREE(genomesubdir);
+ FREE(source_genome);
}
}
- FREE(sourcedirs_copy);
Indexdb_setup(index1part);
@@ -926,15 +897,13 @@ main (int argc, char *argv[]) {
static void
print_program_usage () {
fprintf(stdout,"\
-Usage: indexdb_cat [OPTIONS...] -d <genome>\n\
+Usage: indexdb_cat [OPTIONS...] -d </path/to/genome>\n\
\n\
");
/* Input options */
fprintf(stdout,"Options (must include -d)\n");
fprintf(stdout,"\
- -F, --sourcedirs=directories Directory or directories where to read input index files\n\
- May specify multiple directories by separating them with a comma\n\
-D, --destdir=directory Directory where to write cmet index files (default is\n\
value of -F, if provided; otherwise the value of the\n\
GMAP genome directory specified at compile time)\n\
=====================================
src/output.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: output.c 219218 2019-05-12 22:25:48Z twu $";
+static char rcsid[] = "$Id: output.c 222194 2020-03-23 13:44:44Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -912,6 +912,20 @@ Output_filestring_fromresult (Filestring_T *fp_failedinput, Result_T result, Req
FPRINTF(fp,"</path>\n");
}
+ } else if (printtype == MASK_INTRONS) {
+ /* Print only best path */
+ PUTC('>',fp);
+ Sequence_print_header(fp,headerseq,checksump);
+ Pair_print_mask_introns(fp,Stage3_pairarray(stage3array[0]),Stage3_npairs(stage3array[0]),
+ Stage3_chrlength(stage3array[0]),wraplength,/*include_utr_p*/false);
+
+ } else if (printtype == MASK_UTR_INTRONS) {
+ /* Print only best path */
+ PUTC('>',fp);
+ Sequence_print_header(fp,headerseq,checksump);
+ Pair_print_mask_introns(fp,Stage3_pairarray(stage3array[0]),Stage3_npairs(stage3array[0]),
+ Stage3_chrlength(stage3array[0]),wraplength,/*include_utr_p*/true);
+
} else if (printtype == CDNA) {
for (pathnum = 1; pathnum <= effective_maxpaths; pathnum++) {
PUTC('>',fp);
=====================================
src/output.h
=====================================
@@ -1,11 +1,11 @@
-/* $Id: output.h 218147 2019-01-16 21:28:41Z twu $ */
+/* $Id: output.h 222194 2020-03-23 13:44:44Z twu $ */
#ifndef OUTPUT_INCLUDED
#define OUTPUT_INCLUDED
typedef enum {STD_OUTPUT, SAM_OUTPUT, M8_OUTPUT} Outputtype_T;
typedef enum {SIMPLE, SUMMARY, ALIGNMENT, COMPRESSED, CONTINUOUS, CONTINUOUS_BY_EXON,
- EXONS_CDNA, EXONS_GENOMIC, CDNA, PROTEIN_GENOMIC,
+ EXONS_CDNA, EXONS_GENOMIC, MASK_INTRONS, MASK_UTR_INTRONS, CDNA, PROTEIN_GENOMIC,
PSL_NT, PSL_PRO, GFF3_GENE, GFF3_MATCH_CDNA, GFF3_MATCH_EST, BEDPE,
SAM, COORDS, SPLICESITES, INTRONS, MAP_RANGES, MAP_EXONS} Printtype_T;
=====================================
src/pair.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 221673 2020-02-11 16:17:58Z twu $";
+static char rcsid[] = "$Id: pair.c 222194 2020-03-23 13:44:44Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -9519,6 +9519,86 @@ Pair_print_introns (Filestring_T fp, struct T *pairs, int npairs, char *accessio
}
+static int
+print_Ns (Filestring_T fp, int column, int n, int wraplength) {
+ int i;
+
+ for (i = 0; i < n; i++) {
+ PUTC('N',fp);
+ if (++column % wraplength == 0) {
+ PUTC('\n',fp);
+ column = 0;
+ }
+ }
+
+ return column;
+}
+
+
+void
+Pair_print_mask_introns (Filestring_T fp, struct T *pairs, int npairs,
+ Chrpos_T chrlength, int wraplength, bool include_utr_p) {
+ int exoni = 0, column = 0, i;
+ bool in_exon = false;
+ struct T *ptr = pairs, *this = NULL;
+ Chrpos_T exon_genomestart = 0, exon_genomeend;
+ Chrpos_T last_genomepos = (Chrpos_T) -1;
+
+ assert(pairs != NULL);
+ if (include_utr_p == true) {
+ column = print_Ns(fp,column,pairs->genomepos,wraplength);
+ }
+
+ for (i = 0; i < npairs; i++) {
+ /* prev = this; */
+ this = ptr++;
+
+ if (this->gapp) {
+ if (in_exon == true) {
+ /* Beginning of gap */
+ exon_genomeend = last_genomepos + ONEBASEDP;
+ in_exon = false;
+ }
+ } else if (this->comp == INTRONGAP_COMP) {
+ /* Do nothing */
+ } else {
+ /* Remaining possibilities are MATCH_COMP, DYNPROG_MATCH_COMP, AMBIGUOUS_COMP, INDEL_COMP,
+ SHORTGAP_COMP, or MISMATCH_COMP */
+ if (in_exon == false) {
+ exoni++;
+ if (exoni > 1) {
+ exon_genomestart = this->genomepos + ONEBASEDP;
+ column = print_Ns(fp,column,exon_genomestart - exon_genomeend - 1,wraplength);
+ }
+
+ in_exon = true;
+ }
+ if (this->genome != ' ') {
+ PUTC(this->genome,fp);
+ if (++column % wraplength == 0) {
+ PUTC('\n',fp);
+ column = 0;
+ }
+ }
+ }
+
+ if (this->genome != ' ') {
+ last_genomepos = this->genomepos;
+ }
+ }
+
+ if (include_utr_p == true) {
+ column = print_Ns(fp,column,chrlength - last_genomepos - 1,wraplength);
+ }
+
+ if (column != 0) {
+ PUTC('\n',fp);
+ }
+
+ return;
+}
+
+
#if 0
/* goal_start < goal_end */
Chrpos_T
=====================================
src/pair.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: pair.h 221628 2020-02-04 22:28:32Z twu $ */
+/* $Id: pair.h 222194 2020-03-23 13:44:44Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -279,6 +279,10 @@ Pair_print_splicesites (Filestring_T fp, struct T *pairs, int npairs, char *acce
extern void
Pair_print_introns (Filestring_T fp, struct T *pairs, int npairs, char *accession,
int nexons, Chrnum_T chrnum, Univ_IIT_T chromosome_iit);
+extern void
+Pair_print_mask_introns (Filestring_T fp, struct T *pairs, int npairs,
+ Chrpos_T chrlength, int wraplength, bool include_utr_p);
+
extern int
Pair_nmatches_posttrim (int *max_match_length, List_T pairs, int pos5, int pos3);
=====================================
src/regiondb-write.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: regiondb-write.c 222139 2020-03-13 00:15:01Z twu $";
+static char rcsid[] = "$Id: regiondb-write.c 222312 2020-04-05 16:57:52Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -559,12 +559,13 @@ Region_remainder (Region_T current, Region_T buffer, int shift, size_t offsets_s
}
+/* A region has offsets_size (4^6 = 4096) offsets, followed by REGION_LENGTH positions */
void
Regiondb_cat (FILE *out, char **files, Univcoord_T *genomelengths, int nfiles, Width_T region1part) {
FILE *fp;
char *file;
size_t offsets_size = 0;
- int current_pos, bufferlen;
+ int current_pos = 0, bufferlen;
Region_T current, buffer, temp;
#ifdef DEBUG1
int k;
=====================================
src/stage3hr.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3hr.c 220326 2019-09-12 20:40:15Z twu $";
+static char rcsid[] = "$Id: stage3hr.c 222150 2020-03-13 22:28:19Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3636,6 +3636,9 @@ Stage3end_new_terminal (int *found_score_overall, int *found_score_within_trims,
/* There is no junctions_HtoL field */
/* Do not use junctions after this */
+ new->querystart_chrbound = Substring_querystart_chrbound(substring);
+ new->queryend_chrbound = Substring_queryend_chrbound(substring);
+
#if 0
/* No need to reverse for a single substring */
if (gplusp == true) {
@@ -3847,6 +3850,8 @@ Stage3end_new_precomputed (int *found_score_overall, int *found_score_within_tri
substring1 = (Substring_T) List_head(new->substrings_1toN);
substringN = (Substring_T) List_head(new->substrings_Nto1);
+ new->querystart_chrbound = Substring_querystart_chrbound(substring1);
+ new->queryend_chrbound = Substring_queryend_chrbound(substringN);
genomicstart = Substring_genomicstart(substring1);
genomicend = Substring_genomicend(substringN); /* DOESN'T WORK FOR AMBIGUOUS */
@@ -4666,6 +4671,8 @@ Stage3end_new_substrings (int *found_score_overall, int *found_score_within_trim
substring1 = (Substring_T) List_head(new->substrings_1toN);
substringN = (Substring_T) List_head(new->substrings_Nto1);
+ new->querystart_chrbound = Substring_querystart_chrbound(substring1);
+ new->queryend_chrbound = Substring_queryend_chrbound(substringN);
genomicstart = Substring_genomicstart(substring1);
genomicend = Substring_genomicend(substringN); /* DOESN'T WORK FOR AMBIGUOUS */
@@ -4981,6 +4988,9 @@ Stage3end_new_substitution (int *found_score_overall, int *found_score_within_tr
new->junctions_1toN = (List_T) NULL;
new->junctions_Nto1 = (List_T) NULL;
+ new->querystart_chrbound = Substring_querystart_chrbound(substring);
+ new->queryend_chrbound = Substring_queryend_chrbound(substring);
+
new->transcripts = (List_T) NULL;
new->transcripts_other = (List_T) NULL;
@@ -5249,6 +5259,10 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
assert(Substring_querystart(List_head(new->substrings_1toN)) < Substring_querystart(List_head(new->substrings_Nto1)));
/* Done assigning substrings */
+ substring1 = (Substring_T) List_head(new->substrings_1toN);
+ substringN = (Substring_T) List_head(new->substrings_Nto1);
+ new->querystart_chrbound = Substring_querystart_chrbound(substring1);
+ new->queryend_chrbound = Substring_queryend_chrbound(substringN);
if (new->chrnum != 0) {
/* Ordinary splice. No need to distinguish effective_chrnum and other_chrnum */
@@ -5257,8 +5271,6 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
new->other_chrnum = 0;
/* Define coordinates as usual */
- substring1 = (Substring_T) List_head(new->substrings_1toN);
- substringN = (Substring_T) List_head(new->substrings_Nto1);
new->genomicstart = Substring_genomicstart(substring1);
new->genomicend = Substring_genomicend(substringN);
new->plusp = Substring_plusp(substring1);
@@ -5266,12 +5278,12 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
} else {
/* Translocation. Concordant substring is the inner one */
if (first_read_p == true) {
- substring_for_concordance = (Substring_T) List_head(new->substrings_Nto1);
- substring_other = (Substring_T) List_head(new->substrings_1toN);
+ substring_for_concordance = substringN; /* (Substring_T) List_head(new->substrings_Nto1); */
+ substring_other = substring1; /* (Substring_T) List_head(new->substrings_1toN); */
debug0(printf("Since first read, substring for concordance is at chr %d\n",Substring_chrnum(substring_for_concordance)));
} else {
- substring_for_concordance = (Substring_T) List_head(new->substrings_1toN);
- substring_other = (Substring_T) List_head(new->substrings_Nto1);
+ substring_for_concordance = substring1; /* (Substring_T) List_head(new->substrings_1toN); */
+ substring_other = substringN; /* (Substring_T) List_head(new->substrings_Nto1); */
debug0(printf("Since second read, substring for concordance is at chr %d\n",Substring_chrnum(substring_for_concordance)));
}
@@ -5282,8 +5294,6 @@ Stage3end_new_splice (int *found_score_overall, int *found_score_within_trims,
new->genomicstart = Substring_genomicstart(substring_for_concordance);
new->genomicend = Substring_genomicend(substring_for_concordance);
new->plusp = Substring_plusp(substring_for_concordance);
-
-
}
#ifdef DEBUG0
@@ -5427,7 +5437,7 @@ Stage3end_new_distant (int *found_score_overall, int *found_score_within_trims,
T new;
Substring_T substring_for_concordance; /* always the inner substring */
Substring_T substring_other; /* the outer substring */
- Substring_T substring;
+ Substring_T substring, substring1, substringN;
Substring_T donor, acceptor;
Junction_T junction;
@@ -5532,14 +5542,19 @@ Stage3end_new_distant (int *found_score_overall, int *found_score_within_trims,
/* Done assigning substrings */
+ substring1 = (Substring_T) List_head(new->substrings_1toN);
+ substringN = (Substring_T) List_head(new->substrings_Nto1);
+ new->querystart_chrbound = Substring_querystart_chrbound(substring1);
+ new->queryend_chrbound = Substring_queryend_chrbound(substringN);
+
/* Translocation. Concordant substring is the inner one */
if (first_read_p == true) {
- substring_for_concordance = (Substring_T) List_head(new->substrings_Nto1);
- substring_other = (Substring_T) List_head(new->substrings_1toN);
+ substring_for_concordance = substringN; /* (Substring_T) List_head(new->substrings_Nto1); */
+ substring_other = substring1; /* (Substring_T) List_head(new->substrings_1toN); */
debug0(printf("Since first read, substring for concordance is at chr %d\n",Substring_chrnum(substring_for_concordance)));
} else {
- substring_for_concordance = (Substring_T) List_head(new->substrings_1toN);
- substring_other = (Substring_T) List_head(new->substrings_Nto1);
+ substring_for_concordance = substring1; /* (Substring_T) List_head(new->substrings_1toN); */
+ substring_other = substringN; /* (Substring_T) List_head(new->substrings_Nto1); */
debug0(printf("Since second read, substring for concordance is at chr %d\n",Substring_chrnum(substring_for_concordance)));
}
@@ -6248,7 +6263,7 @@ Stage3end_filter (List_T hits, Hitlistpool_T hitlistpool, int max_mismatches, in
printf("Comparing score %d against max_mismatches %d, and coverage %d against min_coverage %d\n",
hit->score,max_mismatches,hit->querylength - hit->trim_querystart - hit->trim_queryend,min_coverage);
#endif
- if (hit->score_overall > max_mismatches) {
+ if (hit->score_overall > max_mismatches + hit->querystart_chrbound + (hit->querylength - hit->queryend_chrbound)) {
Stage3end_free(&hit);
} else if (hit->querylength - hit->trim_querystart - hit->trim_queryend < min_coverage) {
Stage3end_free(&hit);
@@ -6264,7 +6279,7 @@ Stage3end_filter (List_T hits, Hitlistpool_T hitlistpool, int max_mismatches, in
printf("Comparing score_ignore_trim %d against max_mismatches %d, and coverage %d against min_coverage %d\n",
hit->score_ignore_trim,max_mismatches,hit->querylength - hit->trim_querystart - hit->trim_queryend,min_coverage);
#endif
- if (hit->score_within_trims > max_mismatches) {
+ if (hit->score_within_trims > max_mismatches + hit->querystart_chrbound + (hit->querylength - hit->queryend_chrbound)) {
Stage3end_free(&hit);
} else if (hit->querylength - hit->trim_querystart - hit->trim_queryend < min_coverage) {
Stage3end_free(&hit);
=====================================
src/stage3hrdef.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: stage3hrdef.h 221582 2020-01-30 21:28:29Z twu $ */
+/* $Id: stage3hrdef.h 222150 2020-03-13 22:28:19Z twu $ */
#ifndef STAGE3HRDEF_INCLUDED
#define STAGE3HRDEF_INCLUDED
@@ -47,6 +47,8 @@ struct T {
Univcoord_T chrhigh;
Chrpos_T chrlength;
+ int querystart_chrbound;
+ int queryend_chrbound;
int querylength; /* Needed for overlap and pairlength calculations */
int querylength_adj; /* Adjusted for insertions */
=====================================
src/substring.c
=====================================
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: substring.c 220326 2019-09-12 20:40:15Z twu $";
+static char rcsid[] = "$Id: substring.c 222150 2020-03-13 22:28:19Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -261,6 +261,10 @@ struct T {
int queryend_pretrim; /* From original invocation, before trimming */
int querystart; /* For part that aligns to genome, post-trim */
int queryend;
+
+ int querystart_chrbound; /* Bound on alignment if this were substring1 */
+ int queryend_chrbound; /* Bound on alignment if this were substringN */
+
int amb_splice_pos; /* Used for ambiguous substrings. Based on left, not querystart */
int querylength;
@@ -1707,9 +1711,32 @@ Substring_new (int nmismatches, Univcoord_T left, int querystart, int queryend,
if (plusp == true) {
new->genomicstart = left;
new->genomicend = left + querylength;
+
+ if (left + querylength > chroffset + querylength) {
+ new->querystart_chrbound = 0;
+ } else {
+ new->querystart_chrbound = chroffset - left;
+ }
+ if (left + querylength < chrhigh) {
+ new->queryend_chrbound = querylength;
+ } else {
+ new->queryend_chrbound = chrhigh - left;
+ }
+
} else {
new->genomicstart = left + querylength;
new->genomicend = left;
+
+ if (left + querylength < chrhigh) {
+ new->querystart_chrbound = 0;
+ } else {
+ new->querystart_chrbound = (left + querylength) - chrhigh;
+ }
+ if (left + querylength > chroffset + querylength) {
+ new->queryend_chrbound = querylength;
+ } else {
+ new->queryend_chrbound = (left + querylength) - chroffset;
+ }
}
new->trim_querystart_splicep = splice_querystart_p;
@@ -2621,9 +2648,32 @@ Substring_new_simple (int nmismatches, Univcoord_T left, int querystart, int que
if (plusp == true) {
new->genomicstart = left;
new->genomicend = left + querylength;
+
+ if (left + querylength > chroffset + querylength) {
+ new->querystart_chrbound = 0;
+ } else {
+ new->querystart_chrbound = chroffset - left;
+ }
+ if (left + querylength < chrhigh) {
+ new->queryend_chrbound = querylength;
+ } else {
+ new->queryend_chrbound = chrhigh - left;
+ }
+
} else {
new->genomicstart = left + querylength;
new->genomicend = left;
+
+ if (left + querylength < chrhigh) {
+ new->querystart_chrbound = 0;
+ } else {
+ new->querystart_chrbound = (left + querylength) - chrhigh;
+ }
+ if (left + querylength > chroffset + querylength) {
+ new->queryend_chrbound = querylength;
+ } else {
+ new->queryend_chrbound = (left + querylength) - chroffset;
+ }
}
new->amb_splice_pos = 0;
@@ -2790,6 +2840,9 @@ Substring_new_alts_D (int querystart, int queryend, int splice_pos, int querylen
new->genomicend = Univcoord_max(alts_coords,alts_ncoords);
}
+ new->querystart_chrbound = 0;
+ new->queryend_chrbound = querylength;
+
new->start_endtype = END;
new->end_endtype = END;
@@ -2899,6 +2952,9 @@ Substring_new_alts_A (int querystart, int queryend, int splice_pos, int querylen
new->genomicend = Univcoord_max(alts_coords,alts_ncoords);
}
+ new->querystart_chrbound = 0;
+ new->queryend_chrbound = querylength;
+
new->start_endtype = END;
new->end_endtype = END;
@@ -3524,6 +3580,17 @@ Substring_querylength (T this) {
return this->querylength;
}
+int
+Substring_querystart_chrbound (T this) {
+ return this->querystart_chrbound;
+}
+
+int
+Substring_queryend_chrbound (T this) {
+ return this->queryend_chrbound;
+}
+
+
int
Substring_match_length (T this) {
if (this == NULL) {
@@ -3893,6 +3960,9 @@ Substring_copy (T old) {
new->queryend_pretrim = old->queryend_pretrim;
new->querystart = old->querystart;
new->queryend = old->queryend;
+ new->querystart_chrbound = old->querystart_chrbound;
+ new->queryend_chrbound = old->queryend_chrbound;
+
new->amb_splice_pos = old->amb_splice_pos;
new->querylength = old->querylength;
=====================================
src/substring.h
=====================================
@@ -1,4 +1,4 @@
-/* $Id: substring.h 220326 2019-09-12 20:40:15Z twu $ */
+/* $Id: substring.h 222150 2020-03-13 22:28:19Z twu $ */
#ifndef SUBSTRING_INCLUDED
#define SUBSTRING_INCLUDED
@@ -215,6 +215,11 @@ extern int
Substring_queryend (T this);
extern int
Substring_querylength (T this);
+extern int
+Substring_querystart_chrbound (T this);
+extern int
+Substring_queryend_chrbound (T this);
+
extern int
Substring_match_length (T this);
extern int
=====================================
util/gmap_cat.pl.in
=====================================
@@ -10,11 +10,11 @@ my $bindir = "@BINDIR@"; # dirname(__FILE__)
use IO::File;
use Getopt::Long;
+use File::Basename;
Getopt::Long::Configure(qw(no_auto_abbrev no_ignore_case_always));
GetOptions(
- 'F|sourcedirs=s' => \$sourcedirs, # source directories
'D|dir=s' => \$destdir, # destination directory
'd|db=s' => \$dbname, # genome name
'n|names=s' => \$contig_name_file, # substitute contig names
@@ -41,12 +41,7 @@ if (!defined($dbname)) {
}
}
-if (!defined($sourcedirs)) {
- print STDERR "Source directories not defined with -F, so using the default directory $gmapdb\n";
- @sourcedirs = ($gmapdb);
-} else {
- @sourcedirs = split ",",$sourcedirs;
-}
+
if (!defined($destdir) || $destdir !~ /\S/) {
print STDERR "Destination directory not defined with -D flag, so writing to $gmapdb\n";
$destdir = $gmapdb;
@@ -111,13 +106,10 @@ $IIT = new IO::File(" | $bindir/iit_store -1 -o $dbdir/$dbname.chromosome");
%seenp = ();
$genomelen = 0;
foreach $input_genome (@input_genomes) {
- $i = 0;
- while ($i <= $#sourcedirs &&
- !defined($FP = new IO::File("$sourcedirs[$i]/$input_genome/$input_genome.chromosome"))) {
- $i++;
- }
- if (!defined($FP)) {
- die "Cannot open $input_genome/$input_genome.chromosome in any of the given sourcedirs";
+ $input_dbroot = basename($input_genome);
+ if (!defined($FP = new IO::File("$input_genome/$input_dbroot.chromosome"))) {
+ print STDERR "Expecting full paths to input genomes";
+ die "Cannot open $input_genome/$input_dbroot.chromosome in any of the given sourcedirs";
}
while (defined($line = <$FP>)) {
@@ -169,7 +161,7 @@ close($OUT);
# (2) Merge genomecomp and genomebits
print STDERR "Merging genome strings\n";
-$cmd = "\"$bindir/gmapindex\" -F \"$sourcedirs\" -D \"$dbdir\" -d \"$dbname\" -C " . join(" ", at input_genomes);
+$cmd = "\"$bindir/gmapindex\" -D \"$dbdir\" -d \"$dbname\" -C " . join(" ", at input_genomes);
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -182,7 +174,7 @@ unshuffle_genome($bindir,$dbdir,$dbname,$genomecompfile);
# (3) Merge indexdbs (offsets and positions)
print STDERR "Merging offsets and positions\n";
-$cmd = "\"$bindir/indexdb_cat\" -F \"$sourcedirs\" -D \"$dbdir\" -d \"$dbname\" " . join(" ", at input_genomes);
+$cmd = "\"$bindir/indexdb_cat\" -D \"$dbdir\" -d \"$dbname\" " . join(" ", at input_genomes);
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -191,7 +183,7 @@ if (($rc = system($cmd)) != 0) {
# (4) Merge regiondb
print STDERR "Merging regional hash tables\n";
-$cmd = "\"$bindir/gmapindex\" -F \"$sourcedirs\" -D \"$dbdir\" -d \"$dbname\" -R " . join(" ", at input_genomes);
+$cmd = "\"$bindir/gmapindex\" -D \"$dbdir\" -d \"$dbname\" -R " . join(" ", at input_genomes);
print STDERR "Running $cmd\n";
if (($rc = system($cmd)) != 0) {
die "$cmd failed with return code $rc";
@@ -260,11 +252,11 @@ sub print_usage {
gmap_add: Concatenates gmap databases to be used by GMAP or GSNAP.
Part of GMAP package, version $package_version.
-Usage: gmap_cat [options...] -d <output_genome_name> <input_genome_names...>
+Usage: gmap_cat [options...] -d <output_genome_name> <path/to/genome_dir...>
+
+Note: Input genomes should contain a full or local path for the directory containing the genome index
Options:
- -F, --sourcedirs=STRING Directory or directories for input genome indices. May specify multiple directories by
- separating them with a comma. For each input genome, they will be searched in the given order.
-D, --dir=STRING Destination directory for output genome index
-d, --db=STRING Output genome name
View it on GitLab: https://salsa.debian.org/med-team/gmap/-/compare/6351a1f254104267b0a877812cd1eca7a8d873ac...580578c81a6f0a4856a08239759752a63be98e0e
--
View it on GitLab: https://salsa.debian.org/med-team/gmap/-/compare/6351a1f254104267b0a877812cd1eca7a8d873ac...580578c81a6f0a4856a08239759752a63be98e0e
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20200411/70766592/attachment-0001.html>
More information about the debian-med-commit
mailing list