[med-svn] [gmap] 01/05: New upstream version 2017-09-30
Alex Mestiashvili
malex-guest at moszumanska.debian.org
Mon Oct 2 11:39:31 UTC 2017
This is an automated email from the git hooks/post-receive script.
malex-guest pushed a commit to branch master
in repository gmap.
commit b588a6101504d84da4d6f572f90a8e5668277f8c
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date: Mon Oct 2 09:59:27 2017 +0200
New upstream version 2017-09-30
---
ChangeLog | 32 +++++++
VERSION | 2 +-
configure | 24 ++---
src/Makefile.am | 4 +-
src/Makefile.in | 30 ++++---
src/chimera.c | 4 +-
src/gmap.c | 41 ++++++---
src/gsnap.c | 4 +-
src/pair.c | 150 +++++++++++++++++++++++++------
src/pair.h | 7 +-
src/stage3.c | 44 ++++++---
src/translation.c | 54 +++++++++--
src/uinttableuint.c | 252 ++++++++++++++++++++++++++++++++++++++++++++++++++++
src/uinttableuint.h | 33 +++++++
src/uniqscan.c | 4 +-
15 files changed, 597 insertions(+), 88 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index f8bfc3b..e5cd7b1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,35 @@
+2017-09-29 twu
+
+ * VERSION, public-2017-09-05, src: Updated version number
+
+ * stage3.c: Merged revision 210194 from trunk to move build_dual_breaks step
+
+2017-09-27 twu
+
+ * stage3.c: Merged revision 210170 from trunk to stop over-aggressive use of
+ maxintronlen_end from wrong side of the sequence
+
+ * chimera.c: Merged revision 210169 from trunk to initialize a variable
+
+2017-09-23 twu
+
+ * VERSION, public-2017-09-05, src, uinttableuint.c, uinttableuint.h: Merged
+ revision 210071 from trunk to add files
+
+ * gsnap.c, pair.c, pair.h, public-2017-09-05, src, uniqscan.c: Merged
+ revision 210062 from trunk to fix computation of cds bounds for GFF3
+ output
+
+ * translation.c: Merged revision 210057 from trunk to assign aaphase_g for
+ final genomic codon
+
+ * gmap.c: Merged revision 210059 from trunk to restore MAX_CHIMERA_ITER to
+ 3, but not iterating multiple times for middle pieces. Added option
+ --gff3-cds
+
+ * Makefile.gsnaptoo.am: Merged revision 210058 from trunk to add
+ uinttableuint to library
+
2017-09-11 twu
* VERSION: Updated version number
diff --git a/VERSION b/VERSION
index 5c57d19..2079994 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2017-09-11
\ No newline at end of file
+2017-09-30
\ No newline at end of file
diff --git a/configure b/configure
index 9f64c4b..1400e2e 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for gmap 2017-09-11.
+# Generated by GNU Autoconf 2.69 for gmap 2017-09-30.
#
# Report bugs to <Thomas Wu <twu at gene.com>>.
#
@@ -590,8 +590,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='gmap'
PACKAGE_TARNAME='gmap'
-PACKAGE_VERSION='2017-09-11'
-PACKAGE_STRING='gmap 2017-09-11'
+PACKAGE_VERSION='2017-09-30'
+PACKAGE_STRING='gmap 2017-09-30'
PACKAGE_BUGREPORT='Thomas Wu <twu at gene.com>'
PACKAGE_URL=''
@@ -1369,7 +1369,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures gmap 2017-09-11 to adapt to many kinds of systems.
+\`configure' configures gmap 2017-09-30 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1440,7 +1440,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of gmap 2017-09-11:";;
+ short | recursive ) echo "Configuration of gmap 2017-09-30:";;
esac
cat <<\_ACEOF
@@ -1577,7 +1577,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-gmap configure 2017-09-11
+gmap configure 2017-09-30
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2183,7 +2183,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by gmap $as_me 2017-09-11, which was
+It was created by gmap $as_me 2017-09-30, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2533,8 +2533,8 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking package version" >&5
$as_echo_n "checking package version... " >&6; }
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-09-11" >&5
-$as_echo "2017-09-11" >&6; }
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 2017-09-30" >&5
+$as_echo "2017-09-30" >&6; }
### Read defaults
@@ -4401,7 +4401,7 @@ fi
# Define the identity of the package.
PACKAGE='gmap'
- VERSION='2017-09-11'
+ VERSION='2017-09-30'
cat >>confdefs.h <<_ACEOF
@@ -19978,7 +19978,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by gmap $as_me 2017-09-11, which was
+This file was extended by gmap $as_me 2017-09-30, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -20044,7 +20044,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-gmap config.status 2017-09-11
+gmap config.status 2017-09-30
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
diff --git a/src/Makefile.am b/src/Makefile.am
index f73ce68..cf8f009 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -21,7 +21,7 @@ include_HEADERS = fopen.h bool.h types.h separator.h comp.h \
interval.h uintlist.h uint8list.h \
iitdef.h iit-read.h iit-write.h parserange.h \
univinterval.h iit-read-univ.h \
- table.h tableuint.h uinttable.h \
+ table.h tableuint.h uinttable.h uinttableuint.h \
stopwatch.h semaphore.h access.h \
chrom.h filestring.h \
md5.h complement.h bzip2.h sequence.h \
@@ -88,7 +88,7 @@ LIBGMAP_LA_FILES = fopen.h bool.h types.h separator.h comp.h \
iit-read.c iit-read.h iit-write.c iit-write.h parserange.c parserange.h \
univinterval.c univinterval.h iit-read-univ.c iit-read-univ.h \
stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
- table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h \
+ table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h uinttableuint.c uinttableuint.h\
chrom.c chrom.h filestring.c filestring.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
genomicpos.c genomicpos.h \
diff --git a/src/Makefile.in b/src/Makefile.in
index 5b6c083..0712337 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -188,15 +188,15 @@ am__objects_1 = libgmap_la-except.lo libgmap_la-assert.lo \
libgmap_la-iit-read-univ.lo libgmap_la-stopwatch.lo \
libgmap_la-semaphore.lo libgmap_la-access.lo \
libgmap_la-table.lo libgmap_la-tableuint.lo \
- libgmap_la-uinttable.lo libgmap_la-chrom.lo \
- libgmap_la-filestring.lo libgmap_la-md5.lo libgmap_la-bzip2.lo \
- libgmap_la-sequence.lo libgmap_la-genomicpos.lo \
- libgmap_la-bitpack64-read.lo libgmap_la-bitpack64-readtwo.lo \
- libgmap_la-maxent_hr.lo libgmap_la-popcount.lo \
- libgmap_la-genome128_hr.lo libgmap_la-compress.lo \
- libgmap_la-bytecoding.lo libgmap_la-sarray-read.lo \
- libgmap_la-chrnum.lo libgmap_la-genome.lo \
- libgmap_la-datadir.lo
+ libgmap_la-uinttable.lo libgmap_la-uinttableuint.lo \
+ libgmap_la-chrom.lo libgmap_la-filestring.lo libgmap_la-md5.lo \
+ libgmap_la-bzip2.lo libgmap_la-sequence.lo \
+ libgmap_la-genomicpos.lo libgmap_la-bitpack64-read.lo \
+ libgmap_la-bitpack64-readtwo.lo libgmap_la-maxent_hr.lo \
+ libgmap_la-popcount.lo libgmap_la-genome128_hr.lo \
+ libgmap_la-compress.lo libgmap_la-bytecoding.lo \
+ libgmap_la-sarray-read.lo libgmap_la-chrnum.lo \
+ libgmap_la-genome.lo libgmap_la-datadir.lo
dist_libgmap_la_OBJECTS = $(am__objects_1)
libgmap_la_OBJECTS = $(dist_libgmap_la_OBJECTS)
AM_V_lt = $(am__v_lt_ at AM_V@)
@@ -2760,7 +2760,7 @@ include_HEADERS = fopen.h bool.h types.h separator.h comp.h \
interval.h uintlist.h uint8list.h \
iitdef.h iit-read.h iit-write.h parserange.h \
univinterval.h iit-read-univ.h \
- table.h tableuint.h uinttable.h \
+ table.h tableuint.h uinttable.h uinttableuint.h \
stopwatch.h semaphore.h access.h \
chrom.h filestring.h \
md5.h complement.h bzip2.h sequence.h \
@@ -2781,7 +2781,7 @@ LIBGMAP_LA_FILES = fopen.h bool.h types.h separator.h comp.h \
iit-read.c iit-read.h iit-write.c iit-write.h parserange.c parserange.h \
univinterval.c univinterval.h iit-read-univ.c iit-read-univ.h \
stopwatch.c stopwatch.h semaphore.c semaphore.h access.c access.h \
- table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h \
+ table.c table.h tableuint.c tableuint.h uinttable.c uinttable.h uinttableuint.c uinttableuint.h\
chrom.c chrom.h filestring.c filestring.h \
md5.c md5.h complement.h bzip2.c bzip2.h sequence.c sequence.h \
genomicpos.c genomicpos.h \
@@ -6498,6 +6498,7 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uint8list.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uintlist.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uinttable.Plo at am__quote@
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-uinttableuint.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/libgmap_la-univinterval.Plo at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-access.Po at am__quote@
@AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/sam_sort-assert.Po at am__quote@
@@ -6908,6 +6909,13 @@ libgmap_la-uinttable.lo: uinttable.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-uinttable.lo `test -f 'uinttable.c' || echo '$(srcdir)/'`uinttable.c
+libgmap_la-uinttableuint.lo: uinttableuint.c
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-uinttableuint.lo -MD -MP -MF $(DEPDIR)/libgmap_la-uinttableuint.Tpo -c -o libgmap_la-uinttableuint.lo `test -f 'uinttableuint.c' || echo '$(srcdir)/'`uinttableuint.c
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-uinttableuint.Tpo $(DEPDIR)/libgmap_la-uinttableuint.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='uinttableuint.c' object='libgmap_la-uinttableuint.lo' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -c -o libgmap_la-uinttableuint.lo `test -f 'uinttableuint.c' || echo '$(srcdir)/'`uinttableuint.c
+
libgmap_la-chrom.lo: chrom.c
@am__fastdepCC_TRUE@ $(AM_V_CC)$(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgmap_la_CFLAGS) $(CFLAGS) -MT libgmap_la-chrom.lo -MD -MP -MF $(DEPDIR)/libgmap_la-chrom.Tpo -c -o libgmap_la-chrom.lo `test -f 'chrom.c' || echo '$(srcdir)/'`chrom.c
@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/libgmap_la-chrom.Tpo $(DEPDIR)/libgmap_la-chrom.Plo
diff --git a/src/chimera.c b/src/chimera.c
index ba7a23f..86dc981 100644
--- a/src/chimera.c
+++ b/src/chimera.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: chimera.c 206139 2017-05-11 03:28:35Z twu $";
+static char rcsid[] = "$Id: chimera.c 210171 2017-09-27 22:22:11Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -699,7 +699,7 @@ Chimera_find_breakpoint (int *chimeraequivpos, int *rangelow, int *rangehigh,
/* Use secondbest to find a range for exon-exon searching */
- *rangelow = 0;
+ *rangelow = *rangehigh = 0;
for (pos = 0; pos < queryntlength - 1; pos++) {
if (gapp_sub1[pos] == false) {
if (gapp_sub2[pos+1] == false) {
diff --git a/src/gmap.c b/src/gmap.c
index 8c9e698..f649c12 100644
--- a/src/gmap.c
+++ b/src/gmap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gmap.c 209801 2017-09-11 21:58:12Z twu $";
+static char rcsid[] = "$Id: gmap.c 210067 2017-09-23 00:16:06Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -120,7 +120,11 @@ static char rcsid[] = "$Id: gmap.c 209801 2017-09-11 21:58:12Z twu $";
#define MAX_BADOLIGOS 0.30 /* Setting to 1.0 effectively turns this check off */
#define MAX_REPOLIGOS 0.40 /* Setting to 1.0 effectively turns this check off */
-#define MAX_CHIMERA_ITER 1 /* Values larger than 1 can lead to very long (or infinite?) run times */
+/* Value of 1 can miss end exons, but values larger than 1 can lead to
+ very long (or infinite?) run times when combined with
+ --intronlength */
+#define MAX_CHIMERA_ITER 3
+
#define CHIMERA_PENALTY 30 /* A small value for chimera_margin will reduce this */
#define CHIMERA_IDENTITY 0.98
#define CHIMERA_PVALUE 0.01
@@ -390,6 +394,7 @@ static bool require_splicedir_p = false;
/* GFF3 */
static bool gff3_separators_p = true;
static bool gff3_phase_swap_p = false;
+static CDStype_T cdstype = CDS_CDNA;
/* SAM */
/* Applicable to PMAP? */
@@ -571,6 +576,7 @@ static struct option long_options[] = {
{"gff3-add-separators", required_argument, 0, 0}, /* gff3_separators_p */
{"gff3-swap-phase", required_argument, 0, 0}, /* gff3_phase_swap_p */
+ {"gff3-cds", required_argument, 0, 0}, /* cdstype */
#ifndef PMAP
{"quality-protocol", required_argument, 0, 0}, /* quality_shift */
@@ -3898,7 +3904,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
if (*mergedp == true) {
testlocalp = true; /* Local merge */
- } else {
+ } else if (iter == 1) {
+ /* Check for middle pieces only on first iteration */
debug2(printf("Checking for middle piece local, starting with list length %d\n",List_length(stage3list)));
stage3list = check_middle_piece_local(&foundp,stage3list,queryseq,queryuc,
#ifdef PMAP
@@ -3910,6 +3917,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
/* Iterate */
testlocalp = true;
}
+ } else {
+ testlocalp = false;
}
}
}
@@ -3987,7 +3996,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
} else {
if (*mergedp == true) {
testchimerap = true; /* Local merge */
- } else {
+ } else if (iter == 1) {
+ /* Check for middle pieces only on first iteration */
debug2(printf("Checking for middle piece chimera, starting with list length %d\n",List_length(stage3list)));
stage3list = check_middle_piece_chimera(&foundp,stage3list,queryseq,queryuc,
#ifdef PMAP
@@ -4001,6 +4011,8 @@ apply_stage3 (bool *mergedp, Chimera_T *chimera, List_T gregions, Sequence_T que
} else {
testchimerap = false;
}
+ } else {
+ testchimerap = false;
}
}
debug2(printf("testchimerap is %d\n",testchimerap));
@@ -5410,11 +5422,6 @@ parse_command_line (int argc, char *argv[], int optind) {
} else if (!strcmp(long_name,"min-intronlength")) {
min_intronlength = atoi(check_valid_int(optarg));
- } else if (!strcmp(long_name,"intronlength")) {
- /* Included for backwards compatibility. Sets both
- --max-intronlength-middle and --max-intronlength-ends */
- maxintronlen = maxintronlen_ends = atoi(check_valid_int(optarg));
-
} else if (!strcmp(long_name,"max-intronlength-middle")) {
maxintronlen = atoi(check_valid_int(optarg));
@@ -5553,6 +5560,16 @@ parse_command_line (int argc, char *argv[], int optind) {
return 9;
}
+ } else if (!strcmp(long_name,"gff3-cds")) {
+ if (!strcmp(optarg,"cdna")) {
+ cdstype = CDS_CDNA;
+ } else if (!strcmp(optarg,"genomic")) {
+ cdstype = CDS_GENOMIC;
+ } else {
+ fprintf(stderr,"--gff3-cds flag must be cdna or genomic\n");
+ return 9;
+ }
+
#ifndef PMAP
} else if (!strcmp(long_name,"no-sam-headers")) {
sam_headers_p = false;
@@ -5663,6 +5680,8 @@ parse_command_line (int argc, char *argv[], int optind) {
}
break;
+ case 'K': maxintronlen = maxintronlen_ends = atoi(check_valid_int(optarg)); break;
+
case 'w': shortsplicedist = strtoul(check_valid_int(optarg),NULL,10); break;
case 'L': maxtotallen_bound = atoi(check_valid_int(optarg)); break;
@@ -6823,7 +6842,7 @@ main (int argc, char *argv[]) {
force_xs_direction_p,md_lowercase_variant_p,
/*snps_p*/genomecomp_alt ? true : false,
/*print_nsnpdiffs_p*/genomecomp_alt ? true : false,genomelength,
- gff3_phase_swap_p,sam_cigar_extended_p);
+ gff3_phase_swap_p,cdstype,sam_cigar_extended_p);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
require_splicedir_p,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
@@ -7452,6 +7471,8 @@ Output options\n\
--gff3-swap-phase=INT Whether to swap phase (0 => 0, 1 => 2, 2 => 1) in gff3_gene format\n\
Needed by some analysis programs, but deviates from GFF3 specification\n\
Values: 0 (no, default), 1 (yes)\n\
+ --gff3-cds=STRING Whether to use cDNA or genomic translation for the CDS coordinates\n\
+ Values: cdna (default), genomic\n\
");
fprintf(stdout,"\n");
diff --git a/src/gsnap.c b/src/gsnap.c
index 130a893..cd61704 100644
--- a/src/gsnap.c
+++ b/src/gsnap.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: gsnap.c 207853 2017-06-29 20:33:16Z twu $";
+static char rcsid[] = "$Id: gsnap.c 210070 2017-09-23 00:17:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -3442,7 +3442,7 @@ worker_setup (char *genomesubdir, char *fileroot) {
force_xs_direction_p,md_lowercase_variant_p,
/*snps_p*/snps_iit ? true : false,print_nsnpdiffs_p,
Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
- /*gff3_phase_swap_p*/false,/*cigar_extended_p*/false);
+ /*gff3_phase_swap_p*/false,/*cdstype*/CDS_CDNA,/*cigar_extended_p*/false);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
/*require_splicedir_p*/true,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
diff --git a/src/pair.c b/src/pair.c
index 1d7fd5e..d75929e 100644
--- a/src/pair.c
+++ b/src/pair.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: pair.c 209799 2017-09-11 21:45:19Z twu $";
+static char rcsid[] = "$Id: pair.c 210070 2017-09-23 00:17:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -151,6 +151,7 @@ static bool print_nsnpdiffs_p;
static double genomelength; /* For BLAST E-value */
static bool gff3_phase_swap_p;
+static CDStype_T cdstype;
static bool cigar_extended_p;
@@ -158,7 +159,8 @@ void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
- Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, bool cigar_extended_p_in) {
+ Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, CDStype_T cdstype_in,
+ bool cigar_extended_p_in) {
trim_mismatch_score = trim_mismatch_score_in;
trim_indel_score = trim_indel_score_in;
gff3_separators_p = gff3_separators_p_in;
@@ -169,6 +171,7 @@ Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
print_nsnpdiffs_p = print_nsnpdiffs_p_in;
genomelength = (double) genomelength_in;
gff3_phase_swap_p = gff3_phase_swap_p_in;
+ cdstype = cdstype_in;
cigar_extended_p = cigar_extended_p_in;
return;
@@ -2713,7 +2716,7 @@ print_gff3_exons_forward (Filestring_T fp, struct T *pairs, int npairs, int path
struct T *ptr, *this = NULL;
int exon_querystart = -1, exon_queryend, exon_phase;
Chrpos_T exon_genomestart = -1, exon_genomeend, intron_start, intron_end;
- int pctidentity, num = 0, den = 0, exonno = 0, cdsno = 0, starti, endi, i;
+ int pctidentity, num = 0, den = 0, exonno = 0, cdsno = 0, starti, endi, last_valid_i, i;
int Mlength = 0, Ilength = 0, Dlength = 0;
List_T tokens = NULL;
char token[11];
@@ -2724,26 +2727,74 @@ print_gff3_exons_forward (Filestring_T fp, struct T *pairs, int npairs, int path
int last_querypos = -1;
Chrpos_T last_genomepos = -1U;
+ endi = npairs;
if (cds_p == false) {
starti = 0;
- endi = npairs;
- } else {
+ } else if (cdstype == CDS_CDNA) {
i = 0;
- while (i < npairs && (pairs[i].gapp == false && pairs[i].cdna != ' ' && pairs[i].aaphase_e == -1)) {
- i++;
+ while (i < npairs) {
+ if (pairs[i].gapp == true) {
+ i++;
+ } else if (pairs[i].cdna == ' ') {
+ i++;
+ } else if (pairs[i].aaphase_e == -1) {
+ i++;
+ } else {
+ starti = i;
+ last_valid_i = i;
+ while (i < npairs) {
+ if (pairs[i].gapp == true) {
+ i++;
+ } else if (pairs[i].cdna == ' ') {
+ i++;
+ } else if (pairs[i].aaphase_e != -1) {
+ last_valid_i = i;
+ i++;
+ } else {
+ endi = last_valid_i; /* inclusive */
+ i = npairs;
+ }
+ }
+ }
}
- starti = i;
-
- while (i < npairs && (pairs[i].gapp == true || pairs[i].cdna == ' ' || pairs[i].aaphase_e != -1)) {
- i++;
+
+ } else if (cdstype == CDS_GENOMIC) {
+ i = 0;
+ while (i < npairs) {
+ if (pairs[i].gapp == true) {
+ i++;
+ } else if (pairs[i].genome == ' ') {
+ i++;
+ } else if (pairs[i].aaphase_g == -1) {
+ i++;
+ } else {
+ starti = i;
+ last_valid_i = i;
+ while (i < npairs) {
+ if (pairs[i].gapp == true) {
+ i++;
+ } else if (pairs[i].genome == ' ') {
+ i++;
+ } else if (pairs[i].aaphase_g != -1) {
+ last_valid_i = i;
+ i++;
+ } else {
+ endi = last_valid_i; /* inclusive */
+ i = npairs;
+ }
+ }
+ }
}
- endi = i;
+
+ } else {
+ fprintf(stderr,"Do not recognize cdstype %d\n",cdstype);
+ abort();
}
ptr = &(pairs[starti]);
- for (i = starti; i < endi; i++) {
+ for (i = starti; i <= endi; i++) {
/* prev = this; */
this = ptr++;
@@ -2975,7 +3026,7 @@ print_gff3_exons_backward (Filestring_T fp, struct T *pairs, int npairs, int pat
struct T *ptr, *this = NULL;
int exon_querystart = -1, exon_queryend, exon_phase;
Chrpos_T exon_genomestart = -1, exon_genomeend;
- int pctidentity, num = 0, den = 0, exonno = 0, cdsno = 0, starti, endi, i;
+ int pctidentity, num = 0, den = 0, exonno = 0, cdsno = 0, starti, endi, last_valid_i, i;
#if 0
int intronno = 0;
Chrpos_T intron_start, intron_end;
@@ -2983,26 +3034,73 @@ print_gff3_exons_backward (Filestring_T fp, struct T *pairs, int npairs, int pat
int last_querypos = -1;
Chrpos_T last_genomepos = -1U;
+ endi = npairs;
if (cds_p == false) {
starti = 0;
- endi = npairs;
- } else {
+ } else if (cdstype == CDS_CDNA) {
i = 0;
- while (i < npairs && (pairs[i].gapp == false && pairs[i].cdna != ' ' && pairs[i].aaphase_e == -1)) {
- i++;
+ while (i < npairs) {
+ if (pairs[i].gapp == true) {
+ i++;
+ } else if (pairs[i].cdna == ' ') {
+ i++;
+ } else if (pairs[i].aaphase_e == -1) {
+ i++;
+ } else {
+ starti = i;
+ last_valid_i = i;
+ while (i < npairs) {
+ if (pairs[i].gapp == true) {
+ i++;
+ } else if (pairs[i].cdna == ' ') {
+ i++;
+ } else if (pairs[i].aaphase_e != -1) {
+ last_valid_i = i;
+ i++;
+ } else {
+ endi = last_valid_i; /* inclusive */
+ i = npairs;
+ }
+ }
+ }
}
- starti = i;
-
- while (i < npairs && (pairs[i].gapp == true || pairs[i].cdna == ' ' || pairs[i].aaphase_e != -1)) {
- i++;
+
+ } else if (cdstype == CDS_GENOMIC) {
+ i = 0;
+ while (i < npairs) {
+ if (pairs[i].gapp == true) {
+ i++;
+ } else if (pairs[i].genome == ' ') {
+ i++;
+ } else if (pairs[i].aaphase_g == -1) {
+ i++;
+ } else {
+ starti = i;
+ last_valid_i = i;
+ while (i < npairs) {
+ if (pairs[i].gapp == true) {
+ i++;
+ } else if (pairs[i].genome == ' ') {
+ i++;
+ } else if (pairs[i].aaphase_g != -1) {
+ last_valid_i = i;
+ i++;
+ } else {
+ endi = last_valid_i; /* inclusive */
+ i = npairs;
+ }
+ }
+ }
}
- endi = i;
- }
+ } else {
+ fprintf(stderr,"Do not recognize cdstype %d\n",cdstype);
+ abort();
+ }
- ptr = &(pairs[endi-1]);
- for (i = endi-1; i >= starti; i--) {
+ ptr = &(pairs[endi]);
+ for (i = endi; i >= starti; i--) {
/* prev = this; */
this = ptr--;
diff --git a/src/pair.h b/src/pair.h
index e9e110b..62494b6 100644
--- a/src/pair.h
+++ b/src/pair.h
@@ -1,4 +1,4 @@
-/* $Id: pair.h 207201 2017-06-12 18:40:57Z twu $ */
+/* $Id: pair.h 210070 2017-09-23 00:17:54Z twu $ */
#ifndef PAIR_INCLUDED
#define PAIR_INCLUDED
@@ -27,13 +27,16 @@ typedef struct Pair_T *Pair_T;
#define MATCHESPERGAP 3
+typedef enum {CDS_CDNA, CDS_GENOMIC} CDStype_T;
+
#define T Pair_T
extern void
Pair_setup (int trim_mismatch_score_in, int trim_indel_score_in,
bool gff3_separators_p_in, bool sam_insert_0M_p_in, bool force_xs_direction_p_in,
bool md_lowercase_variant_p_in, bool snps_p_in, bool print_nsnpdiffs_p_in,
- Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, bool cigar_extended_p_in);
+ Univcoord_T genomelength_in, bool gff3_phase_swap_p_in, CDStype_T cdstype,
+ bool cigar_extended_p_in);
extern int
Pair_querypos (T this);
extern Chrpos_T
diff --git a/src/stage3.c b/src/stage3.c
index a3d2a74..58d2638 100644
--- a/src/stage3.c
+++ b/src/stage3.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: stage3.c 208645 2017-07-28 00:55:34Z twu $";
+static char rcsid[] = "$Id: stage3.c 210195 2017-09-29 15:12:33Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -102,6 +102,7 @@ static char rcsid[] = "$Id: stage3.c 208645 2017-07-28 00:55:34Z twu $";
#define MICROEXON_PROB_MISMATCH 0.80
#define END_MIN_EXONLENGTH 12
+#define END_SUFFICIENT_EXONLENGTH 24 /* Defines length beyond which we can ignore maxintronlen_ends */
#if 0
/* No longer used. Not sure why it was used before */
@@ -4022,7 +4023,7 @@ trim_end5_exons (bool *indelp, bool *trim5p, int ambig_end_length, List_T pairs,
#endif
} else {
- if (splice->genomejump > maxintronlen_ends) {
+ if (nmatches < END_SUFFICIENT_EXONLENGTH && splice->genomejump > maxintronlen_ends) {
debug3(printf("End intron is too long, so trimming it\n"));
path = (List_T) NULL;
*trim5p = true;
@@ -4450,7 +4451,7 @@ trim_end3_exons (bool *indelp, bool *trim3p, int ambig_end_length, List_T path,
#endif
} else {
- if (splice->genomejump > maxintronlen_ends) {
+ if (nmatches < END_SUFFICIENT_EXONLENGTH && splice->genomejump > maxintronlen_ends) {
debug3(printf("End intron is too long, so trimming it\n"));
pairs = (List_T) NULL;
*trim3p = true;
@@ -9046,10 +9047,7 @@ extend_ending5 (bool *knownsplicep, int *dynprogindex_minor,
} else if (*finalscore <= 0) {
*knownsplicep = false;
-#if 0
return (List_T) NULL;
-#endif
- return continuous_gappairs_distalgap;
} else {
return continuous_gappairs_distalgap;
}
@@ -9268,10 +9266,7 @@ extend_ending3 (bool *knownsplicep, int *dynprogindex_minor, int *finalscore,
} else if (*finalscore <= 0) {
*knownsplicep = false;
-#if 0
return (List_T) NULL;
-#endif
- return continuous_gappairs_distalgap;
} else {
return continuous_gappairs_distalgap;
}
@@ -12398,6 +12393,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
#endif
+#if 0
+ /* Moved per version 2016-09-14 to case 99 below */
/* Pass 4: Fix dual breaks */
debug(printf("\n*** Pass 4 (dir %d): Fix dual breaks. Iteration0 %d\n",cdna_direction,iter0));
/* pairs = remove_indel_gaps(path); */
@@ -12420,6 +12417,7 @@ path_compute_dir (double *defect_rate, List_T pairs,
return path;
}
#endif
+#endif
/* Pass 5: introns */
@@ -12526,15 +12524,38 @@ path_compute_dir (double *defect_rate, List_T pairs,
}
#endif
+
+#if 1
+ /* Moved per version 2016-09-14 from case 4 above */
+ /* Pass 99: Fix dual breaks */
+ /* >>pairs */
+ debug(printf("\n*** Pass 99 (dir %d): Fix dual breaks. Iteration0 %d\n",cdna_direction,iter0));
+ /* pairs = remove_indel_gaps(path); */
+ path = List_reverse(pairs);
+
+ pairs = build_dual_breaks(&dual_break_p,&dynprogindex_minor,&dynprogindex_major,path,
+ chrnum,chroffset,chrhigh,
+#ifdef PMAP
+ queryaaseq_ptr,
+#endif
+ queryseq_ptr,queryuc_ptr,querylength,
+ cdna_direction,watsonp,genestrand,jump_late_p,pairpool,
+ dynprogL,dynprogM,dynprogR,last_genomedp5,last_genomedp3,maxpeelback,
+ oligoindices_minor,diagpool,cellpool,
+ *defect_rate,/*finalp*/false,/*simplep*/true);
+#endif
+
+
+#if 0
path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
/*finalp*/false);
debug(Pair_dump_list(path,/*zerobasedp*/true));
-
pairs = List_reverse(path);
debug14(printf("Result of build_dual_breaks\n"));
debug14(Pair_dump_list(pairs,true));
debug(printf("Result of build_dual_breaks\n"));
debug(Pair_dump_list(pairs,true));
+#endif
#ifdef GSNAP
/* Too expensive to loop */
@@ -12545,7 +12566,8 @@ path_compute_dir (double *defect_rate, List_T pairs,
debug(printf("At end of outer loop: dual_break_p %d\n",dual_break_p));
}
- path = List_reverse(pairs);
+ path = insert_gapholders(pairs,queryseq_ptr,queryuc_ptr,chroffset,chrhigh,watsonp,pairpool,
+ /*finalp*/false);
return path;
}
diff --git a/src/translation.c b/src/translation.c
index 8112765..d81e423 100644
--- a/src/translation.c
+++ b/src/translation.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: translation.c 188718 2016-04-30 01:53:47Z twu $";
+static char rcsid[] = "$Id: translation.c 210069 2017-09-23 00:16:39Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -79,7 +79,7 @@ Translation_array_new (struct Pair_T *pairs, int translationlen) {
return new;
}
-#if 0
+#ifdef DEBUG
static void
Translation_dump (struct Pair_T *pairs, struct T *translation, int translationlen) {
int i;
@@ -1255,7 +1255,7 @@ assign_cdna_forward (int ncdna, struct Pair_T *pairs, int npairs, bool revcompp,
pair = &(pairs[i]);
codon = pair->aa_e = get_codon_forward(&nexti,pairs,npairs,i,revcompp);
debug2(Pair_dump_one(pair,true));
- debug2(printf(" marked with amino acid %c\n",pair->aa_e));
+ debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
i = nexti;
j += 3;
}
@@ -1276,7 +1276,7 @@ terminate_cdna_forward (struct Pair_T *pairs, int npairs, bool revcompp, int sta
pair = &(pairs[i]);
lastcodon = pair->aa_e = get_codon_forward(&nexti,pairs,npairs,i,revcompp);
debug2(Pair_dump_one(pair,true));
- debug2(printf(" marked with amino acid %c\n",pair->aa_e));
+ debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
i = nexti;
}
return;
@@ -1296,7 +1296,7 @@ assign_cdna_backward (int ncdna, struct Pair_T *pairs, int npairs, bool revcompp
pair = &(pairs[i]);
codon = pair->aa_e = get_codon_backward(&nexti,pairs,i,revcompp);
debug2(Pair_dump_one(pair,true));
- debug2(printf(" marked with amino acid %c\n",pair->aa_e));
+ debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
i = nexti;
j += 3;
}
@@ -1319,7 +1319,7 @@ terminate_cdna_backward (struct Pair_T *pairs, int npairs, bool revcompp, int st
pair = &(pairs[i]);
lastcodon = pair->aa_e = get_codon_backward(&nexti,pairs,i,revcompp);
debug2(Pair_dump_one(pair,true));
- debug2(printf(" marked with amino acid %c\n",pair->aa_e));
+ debug2(printf(" marked with amino acid %c for cdna\n",pair->aa_e));
i = nexti;
}
return;
@@ -1335,6 +1335,8 @@ assign_genomic (int ngenomic, struct Pair_T *pairs, int npairs, int starti) {
while (j < ngenomic) {
pair = &(pairs[i]);
codon = pair->aa_g = get_codon_genomic(&nexti,pairs,npairs,i);
+ debug2(Pair_dump_one(pair,true));
+ debug2(printf(" marked with amino acid %c for genomic\n",pair->aa_g));
i = nexti;
j += 3;
}
@@ -1351,6 +1353,8 @@ terminate_genomic (struct Pair_T *pairs, int npairs, int starti) {
while (i <= npairs - 3 && lastcodon != '*') {
pair = &(pairs[i]);
lastcodon = pair->aa_g = get_codon_genomic(&nexti,pairs,npairs,i);
+ debug2(Pair_dump_one(pair,true));
+ debug2(printf(" marked with amino acid %c for genomic\n",pair->aa_g));
i = nexti;
}
return;
@@ -1658,7 +1662,7 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
char lastaa;
struct T *translation;
bool endstopp;
- int i, aapos = 0;
+ int i, j, aapos = 0;
Frame_T translation_frame;
int translation_starti = 0, translation_endi = 0, phase;
int minpos, maxpos;
@@ -1738,12 +1742,14 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
}
lastaa = pairs[i].aa_g;
aapos++;
+ /* printf("Assigning aaphase_g of zero to %d\n",i); */
pairs[i].aaphase_g = 0;
}
} else if (translation[i].frame != 3) {
if ((phase = translation_frame - translation[i].frame) < 0) {
phase += 3;
}
+ /* printf("Assigning aaphase_g of %d to %d\n",phase,i); */
pairs[i].aaphase_g = phase;
}
}
@@ -1768,6 +1774,22 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
}
#endif
+ j = i;
+ while (j < npairs && pairs[j].genome == ' ') {
+ j++;
+ }
+ if (j < npairs) {
+ /* printf("Assigning aaphase_g of one to %d\n",j); */
+ pairs[j++].aaphase_g = 1;
+ }
+ while (j < npairs && pairs[j].genome == ' ') {
+ j++;
+ }
+ if (j < npairs) {
+ /* printf("Assigning aaphase_g of two to %d\n",j); */
+ pairs[j].aaphase_g = 2;
+ }
+
/* Fill in aapos to the end */
for ( ; i < npairs; i++) {
pairs[i].aapos = aapos;
@@ -1790,12 +1812,14 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
}
lastaa = pairs[i].aa_g;
aapos++;
+ /* printf("Assigning aaphase_g of zero to %d\n",i); */
pairs[i].aaphase_g = 0;
}
} else if (translation[i].frame != 3) {
if ((phase = translation_frame - translation[i].frame) < 0) {
phase += 3;
}
+ /* printf("Assigning aaphase_g of %d to %d\n",phase,i); */
pairs[i].aaphase_g = phase;
}
}
@@ -1820,6 +1844,22 @@ Translation_via_genomic (int *translation_leftpos, int *translation_rightpos, in
}
#endif
+ j = i;
+ while (j >= 0 && pairs[j].genome == ' ') {
+ j--;
+ }
+ if (j >= 0) {
+ /* printf("Assigning aaphase_g of one to %d\n",j); */
+ pairs[j--].aaphase_g = 1;
+ }
+ while (j >= 0 && pairs[j].genome == ' ') {
+ j--;
+ }
+ if (j >= 0) {
+ /* printf("Assigning aaphase_g of two to %d\n",j); */
+ pairs[j].aaphase_g = 2;
+ }
+
/* Fill in aapos to the end */
for ( ; i >= 0; --i) {
pairs[i].aapos = aapos;
diff --git a/src/uinttableuint.c b/src/uinttableuint.c
new file mode 100644
index 0000000..1f3ff3f
--- /dev/null
+++ b/src/uinttableuint.c
@@ -0,0 +1,252 @@
+static char rcsid[] = "$Id: uinttableuint.c 210072 2017-09-23 00:41:47Z twu $";
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include "uinttableuint.h"
+#include <stdio.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdlib.h> /* For qsort */
+#include <string.h> /* For strcmp */
+#include "mem.h"
+#include "assert.h"
+
+#define T Uinttableuint_T
+struct T {
+ int size;
+ int length;
+ unsigned int timestamp;
+ struct binding {
+ struct binding *link;
+ unsigned int key;
+ unsigned int value;
+ unsigned int timeindex;
+ } **buckets;
+};
+
+
+
+T
+Uinttableuint_new (int hint) {
+ T table;
+ int i;
+ static int primes[] = { 509, 509, 1021, 2053, 4093,
+ 8191, 16381, 32771, 65521, INT_MAX };
+
+ assert(hint >= 0);
+ for (i = 1; primes[i] < hint; i++) {
+ }
+ table = (T) MALLOC(sizeof(*table) +
+ primes[i-1]*sizeof(table->buckets[0]));
+ table->size = primes[i-1];
+ table->buckets = (struct binding **)(table + 1);
+ for (i = 0; i < table->size; i++) {
+ table->buckets[i] = NULL;
+ }
+ table->length = 0;
+ table->timestamp = 0;
+ return table;
+}
+
+unsigned int
+Uinttableuint_get (T table, const unsigned int key) {
+ int i;
+ struct binding *p;
+
+ assert(table);
+ /* assert(key); -- Doesn't hold for atomic 0 */
+ i = key % table->size;
+ /* printf("Doing Uinttableuint_get on %s at bucket %d\n",(char *) key, i); */
+ for (p = table->buckets[i]; p; p = p->link) {
+ /* printf(" Comparing %s with %s at %p, key = %p\n",(char *) key, (char *) p->key, p, p->key); */
+ if (key == p->key) {
+ break;
+ }
+ }
+ return p ? p->value : 0;
+}
+
+unsigned int
+Uinttableuint_put (T table, const unsigned int key, unsigned int value) {
+ int i;
+ struct binding *p;
+ unsigned int prev;
+
+ assert(table);
+ /* assert(key); -- Doesn't hold for atomic 0 */
+ i = key % table->size;
+ for (p = table->buckets[i]; p; p = p->link) {
+ if (key == p->key) {
+ break;
+ }
+ }
+ if (p == NULL) {
+ NEW(p);
+ p->key = key;
+ /* printf("Doing Uinttable_put at %p, key = %p\n",p,p->key); */
+ p->link = table->buckets[i];
+ table->buckets[i] = p;
+ table->length++;
+ prev = 0;
+ } else {
+ prev = p->value;
+ }
+ p->value = value;
+ p->timeindex = table->timestamp;
+ table->timestamp++;
+ return prev;
+}
+
+int
+Uinttableuint_length (T table) {
+ assert(table);
+ return table->length;
+}
+
+void
+Uinttableuint_map (T table,
+ void (*apply)(const unsigned int key, unsigned int *value, void *cl),
+ void *cl) {
+ int i;
+ struct binding *p;
+
+ assert(table);
+ assert(apply);
+ for (i = 0; i < table->size; i++)
+ for (p = table->buckets[i]; p; p = p->link) {
+ apply(p->key, &p->value, cl);
+ }
+}
+
+unsigned int
+Uinttableuint_remove (T table, const unsigned int key) {
+ int i;
+ struct binding **pp;
+
+ assert(table);
+ /* assert(key); -- Doesn't hold for atomic 0 */
+ table->timestamp++;
+ i = key % table->size;
+ for (pp = &table->buckets[i]; *pp; pp = &(*pp)->link) {
+ if (key == (*pp)->key) {
+ struct binding *p = *pp;
+ unsigned int value = p->value;
+ *pp = p->link;
+ FREE(p);
+ table->length--;
+ return value;
+ }
+ }
+ return 0;
+}
+
+static int
+uint_compare (const void *a, const void *b) {
+ unsigned int x = * (unsigned int *) a;
+ unsigned int y = * (unsigned int *) b;
+
+ if (x < y) {
+ return -1;
+ } else if (y < x) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+
+unsigned int *
+Uinttableuint_keys (T table, bool sortp) {
+ unsigned int *keyarray;
+ int i, j = 0;
+ struct binding *p;
+
+ assert(table);
+ keyarray = (unsigned int *) CALLOC(table->length+1,sizeof(unsigned int));
+ for (i = 0; i < table->size; i++) {
+ for (p = table->buckets[i]; p; p = p->link) {
+ keyarray[j++] = p->key;
+ }
+ }
+
+ if (sortp == true) {
+ qsort(keyarray,table->length,sizeof(unsigned int),uint_compare);
+ }
+
+ return keyarray;
+}
+
+
+static int
+timeindex_cmp (const void *x, const void *y) {
+ struct binding *a = * (struct binding **) x;
+ struct binding *b = * (struct binding **) y;
+
+ if (a->timeindex < b->timeindex) {
+ return -1;
+ } else if (a->timeindex > b->timeindex) {
+ return +1;
+ } else {
+ return 0;
+ }
+}
+
+
+unsigned int *
+Uinttableuint_keys_by_timeindex (T table) {
+ unsigned int *keyarray;
+ int i, j = 0;
+ struct binding **buckets, *p;
+
+ assert(table);
+ buckets = (struct binding **) CALLOC(table->length+1,sizeof(struct binding *));
+ for (i = 0; i < table->size; i++) {
+ for (p = table->buckets[i]; p; p = p->link) {
+ buckets[j++] = p;
+ }
+ }
+ qsort(buckets,table->length,sizeof(struct binding *),timeindex_cmp);
+
+ keyarray = (unsigned int *) CALLOC(table->length,sizeof(unsigned int));
+ for (j = 0; j < table->length; j++) {
+ p = buckets[j];
+ keyarray[j] = p->key;
+ }
+ FREE(buckets);
+
+ return keyarray;
+}
+
+
+unsigned int *
+Uinttableuint_values (T table) {
+ unsigned int *valuearray;
+ int i, j = 0;
+ struct binding *p;
+
+ assert(table);
+ valuearray = (unsigned int *) CALLOC(table->length,sizeof(unsigned int));
+ for (i = 0; i < table->size; i++) {
+ for (p = table->buckets[i]; p; p = p->link) {
+ valuearray[j++] = p->value;
+ }
+ }
+ return valuearray;
+}
+
+void
+Uinttableuint_free (T *table) {
+ assert(table && *table);
+ if ((*table)->length > 0) {
+ int i;
+ struct binding *p, *q;
+ for (i = 0; i < (*table)->size; i++) {
+ for (p = (*table)->buckets[i]; p; p = q) {
+ q = p->link;
+ FREE(p);
+ }
+ }
+ }
+ FREE(*table);
+}
diff --git a/src/uinttableuint.h b/src/uinttableuint.h
new file mode 100644
index 0000000..217c8a2
--- /dev/null
+++ b/src/uinttableuint.h
@@ -0,0 +1,33 @@
+/* $Id: uinttableuint.h 210072 2017-09-23 00:41:47Z twu $ */
+#ifndef UINTTABLEUINT_INCLUDED
+#define UINTTABLEUINT_INCLUDED
+#include "bool.h"
+
+#define T Uinttableuint_T
+typedef struct T *T;
+
+extern T
+Uinttableuint_new (int hint);
+extern void
+Uinttableuint_free (T *table);
+extern int
+Uinttableuint_length (T table);
+extern unsigned int
+Uinttableuint_put (T table, const unsigned int key, unsigned int value);
+extern unsigned int
+Uinttableuint_get (T table, const unsigned int key);
+extern unsigned int
+Uinttableuint_remove (T table, const unsigned int key);
+extern void
+Uinttableuint_map (T table,
+ void (*apply)(const unsigned int key, unsigned int *value, void *cl),
+ void *cl);
+extern unsigned int *
+Uinttableuint_keys (T table, bool sortp);
+extern unsigned int *
+Uinttableuint_keys_by_timeindex (T table);
+extern unsigned int *
+Uinttableuint_values (T table);
+
+#undef T
+#endif
diff --git a/src/uniqscan.c b/src/uniqscan.c
index 121d20b..a7689d1 100644
--- a/src/uniqscan.c
+++ b/src/uniqscan.c
@@ -1,4 +1,4 @@
-static char rcsid[] = "$Id: uniqscan.c 207328 2017-06-14 19:47:20Z twu $";
+static char rcsid[] = "$Id: uniqscan.c 210070 2017-09-23 00:17:54Z twu $";
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
@@ -1327,7 +1327,7 @@ main (int argc, char *argv[]) {
/*force_xs_direction_p*/false,/*md_lowercase_variant_p*/false,
/*snps_p*/snps_iit ? true : false,/*print_nsnpdiffs_p*/snps_iit ? true : false,
Univ_IIT_genomelength(chromosome_iit,/*with_circular_alias*/false),
- /*gff3_phase_swap_p*/false,/*cigar_extended_p*/false);
+ /*gff3_phase_swap_p*/false,/*cdstype*/CDS_CDNA,/*cigar_extended_p*/false);
Stage3_setup(/*splicingp*/novelsplicingp == true || knownsplicingp == true,novelsplicingp,
/*require_splicedir_p*/false,splicing_iit,splicing_divint_crosstable,
donor_typeint,acceptor_typeint,splicesites,altlocp,alias_starts,alias_ends,
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gmap.git
More information about the debian-med-commit
mailing list