[med-svn] [hmmer2] 06/09: New upstream version 2.3.2+dfsg
Andreas Tille
tille at debian.org
Wed Apr 12 11:42:02 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository hmmer2.
commit c71b80a7d0202c55e768f8c28ae8e2377b720f3e
Author: Andreas Tille <tille at debian.org>
Date: Tue Apr 11 17:27:01 2017 +0200
New upstream version 2.3.2+dfsg
---
squid/00README | 39 -
squid/Makefile.in | 314 --
squid/Testsuite/00README | 21 -
squid/Testsuite/Makefile.in | 90 -
squid/Testsuite/bug-1-sfetch-paths | 29 -
squid/Testsuite/iospeed_main.c | 88 -
squid/Testsuite/rndspeed_main.c | 41 -
squid/Testsuite/sqdconfig_main.c | 33 -
squid/Testsuite/testsuite.pm | 44 -
squid/Testsuite/x-base-afetch | 11 -
squid/Testsuite/x-base-alistat | 14 -
squid/Testsuite/x-base-seqstat | 22 -
squid/Testsuite/x-base-sfetch | 9 -
squid/Testsuite/x-base-shuffle | 7 -
squid/Testsuite/x-base-sindex | 21 -
squid/Testsuite/x-base-sreformat | 13 -
squid/a2m.c | 115 -
squid/afetch_main.c | 184 -
squid/aligneval.c | 510 ---
squid/alignio.c | 642 ---
squid/alistat_main.c | 275 --
squid/clustal.c | 181 -
squid/cluster.c | 542 ---
squid/compalign_main.c | 223 -
squid/compstruct_main.c | 323 --
squid/config.sub | 1375 ------
squid/configure | 8042 ------------------------------------
squid/dayhoff.c | 173 -
squid/eps.c | 118 -
squid/file.c | 274 --
squid/getopt.c | 253 --
squid/gki.c | 390 --
squid/gki.h | 51 -
squid/gsi.c | 387 --
squid/gsi.h | 85 -
squid/gsi64.c | 397 --
squid/gsi64.h | 101 -
squid/hsregex.c | 1361 ------
squid/install-sh | 251 --
squid/iupac.c | 221 -
squid/msa.c | 1440 -------
squid/msa.h | 298 --
squid/msf.c | 391 --
squid/phylip.c | 176 -
squid/revcomp.c | 90 -
squid/revcomp_main.c | 108 -
squid/rk.c | 132 -
squid/rk.h | 40 -
squid/selex.c | 828 ----
squid/seqencode.c | 176 -
squid/seqsplit_main.c | 277 --
squid/seqstat_main.c | 238 --
squid/sfetch_main.c | 464 ---
squid/shuffle.c | 641 ---
squid/shuffle_main.c | 336 --
squid/sindex_main.c | 219 -
squid/sqerror.c | 94 -
squid/sqfuncs.h | 272 --
squid/sqio.c | 1933 ---------
squid/squid.h.in | 475 ---
squid/squidconf.h.in | 122 -
squid/squidcore.c | 55 -
squid/sre_ctype.c | 37 -
squid/sre_math.c | 334 --
squid/sre_random.c | 315 --
squid/sre_random.h | 18 -
squid/sre_string.c | 527 ---
squid/sreformat_main.c | 260 --
squid/ssi.c | 1530 -------
squid/ssi.h | 191 -
squid/stack.c | 101 -
squid/stockholm.c | 630 ---
squid/stockholm.h | 51 -
squid/stopwatch.c | 309 --
squid/stopwatch.h | 63 -
squid/test_main.c | 27 -
squid/translate.c | 84 -
squid/translate_main.c | 237 --
squid/types.c | 230 --
squid/vectorops.c | 299 --
squid/vectorops.h | 36 -
squid/weight.c | 751 ----
squid/weight_main.c | 189 -
83 files changed, 32294 deletions(-)
diff --git a/squid/00README b/squid/00README
deleted file mode 100644
index f04ba50..0000000
--- a/squid/00README
+++ /dev/null
@@ -1,39 +0,0 @@
-SQUID - library of functions for biological sequence analysis
-Copyright (C) 1992-2002 Washington University School of Medicine
-
-SQUID is a freely redistributable library of C code functions for
-sequence analysis. SQUID also includes a number of small utility
-programs.
-
-To install squid, see the file:
- INSTALL -- instructions for installing the programs
-
-If you have any questions about redistributing squid or using
-squid code in your own work, see the files:
- COPYRIGHT -- copyright notice, and information on my distribution policy
- LICENSE -- version 2 of the GNU Public License (see COPYRIGHT)
-
-For a web page with more information on squid, see:
- http://www.genetics.wustl.edu/eddy/software/#squid
-
-You can always download the latest stable release of squid from:
- ftp://ftp.genetics.wustl.edu/pub/eddy/software/squid.tar.gz
-
-The development codebase is available by anonymous CVS:
- cvs -d :pserver:anonymous at skynet.wustl.edu:/repository/sre login
- (password "anonymous")
- cvs -d :pserver:anonymous at skynet.wustl.edu:/repository/sre checkout squid
-
-If you encounter any bugs in this library, or you have any questions
-or comments, please e-mail me at the address below. Due to limited
-personal time, I may not respond, but I do read all my mail.
-
- Sean Eddy
- eddy at genetics.wustl.edu
-
- HHMI/Dept. of Genetics
- Washington University School of Medicine
- 660 South Euclid Box 8232
- Saint Louis Missouri 63110
- USA
-
diff --git a/squid/Makefile.in b/squid/Makefile.in
deleted file mode 100644
index 5f7df27..0000000
--- a/squid/Makefile.in
+++ /dev/null
@@ -1,314 +0,0 @@
-###############################################################
-# Makefile for SQUID library
-# CVS $Id: Makefile.in,v 1.53 2003/06/13 20:05:31 eddy Exp $
-#
-# Note: The autoconf variables in this file must be coordinated
-# with HMMER. HMMER creates a Makefile from this
-# Makefile.in using its own configure script, not SQUID's.
-#################################################################
-# HMMER - Biological sequence analysis with profile HMMs
-# Copyright (C) 1992-2003 Washington University School of Medicine
-# All Rights Reserved
-#
-# This source code is distributed under the terms of the
-# GNU General Public License. See the files COPYING and LICENSE
-# for details.
-#################################################################
-
-
-### Installation points
-###
-prefix = @prefix@
-exec_prefix = @exec_prefix@
-BINDIR = @bindir@
-MANDIR = @mandir@
-INCLUDEDIR = @includedir@
-LIBDIR = @libdir@
-SCRIPTDIR = @bindir@
-
-## your compiler and compiler flags
-#
-CC = @CC@
-CFLAGS = @CFLAGS@
-CPPFLAGS = @CPPFLAGS@
-LDFLAGS = @LDFLAGS@
-DEFS = @DEFS@
-LIBS = @LIBS@ -lm
-
-## Archiver command
-#
-AR = @AR@ rcv
-RANLIB = @RANLIB@
-
-## instructions for installing man pages
-#
-INSTMAN = cp
-MANSUFFIX = 1
-
-# Configuration for compiling in optional PVM support
-#
-PVMLIBDIR = @PVMLIBDIR@
-PVMINCDIR = @PVMINCDIR@
-PVMLIBS = @PVMLIBS@
-
-#######
-## You should not need to modify below this line
-#######
-SHELL = /bin/sh
-BASENAME = @PACKAGE_TARNAME@
-PACKAGE = @PACKAGE_NAME@
-RELEASE = @PACKAGE_VERSION@
-RELCODE = @SQUID_RELCODE@
-RELEASEDATE = "@SQUID_DATE@"
-SEEALSO = "http:\/\/www.genetics.wustl.edu\/eddy\/software\/\#squid"
-FTPDIR = /nfs/ftp/eddy/software/
-LICENSETAG = @SQUID_LICENSETAG@
-COMPRESS = gzip
-
-PROGS = afetch\
- alistat\
- compalign\
- compstruct\
- revcomp\
- seqsplit\
- seqstat\
- sfetch\
- shuffle\
- sindex\
- sreformat\
- translate\
- weight
-
-MANS = afetch\
- alistat\
- seqstat\
- sfetch\
- shuffle\
- sreformat\
-
-READMES = 00README INSTALL Makefile.in
-
-SCRIPTS =
-
-PRECONFHDRS = \
- squid.h.in\
- squidconf.h.in
-
-POSTCONFHDRS = \
- squid.h\
- squidconf.h
-
-HDRS = rk.h\
- sqfuncs.h\
- gki.h\
- gsi.h\
- msa.h\
- sre_random.h\
- ssi.h\
- stopwatch.h\
- vectorops.h
-
-OBJS = a2m.o\
- aligneval.o\
- alignio.o\
- clustal.o\
- cluster.o\
- dayhoff.o\
- eps.o\
- file.o\
- getopt.o\
- gki.o\
- gsi.o\
- hsregex.o\
- iupac.o\
- msa.o\
- msf.o\
- phylip.o\
- revcomp.o\
- rk.o\
- selex.o\
- seqencode.o\
- shuffle.o\
- sqerror.o\
- sqio.o\
- squidcore.o\
- sre_ctype.o\
- sre_math.o\
- sre_random.o\
- sre_string.o\
- ssi.o\
- stack.o\
- stockholm.o\
- stopwatch.o\
- translate.o\
- types.o\
- vectorops.o\
- weight.o
-
-################################################################
-# Targets that actually build the squid executables
-all: $(PROGS) libsquid.a
-
-$(PROGS): @EXEC_DEPENDENCY@ ${OBJS}
- ${CC} ${CFLAGS} ${LDFLAGS} ${PVMLIBDIR} ${DEFS} -o $@ $@_main.o ${OBJS} ${PVMLIBS} ${LIBS}
-
-.c.o:
- ${CC} ${CFLAGS} ${CPPFLAGS} ${PVMINCDIR} ${DEFS} -c $<
-
-################################################################
-
-
-################################################################
-# Targets expected by packages (e.g. HMMER) that
-# include SQUID as a module.
-#
-module: libsquid.a
-
-libsquid.a: $(OBJS)
- $(AR) libsquid.a $(OBJS)
- $(RANLIB) libsquid.a
- chmod 644 libsquid.a
-#################################################################
-
-
-install: $(PROGS) libsquid.a
- test -d $(LIBDIR) || mkdir -p $(LIBDIR)
- test -d $(BINDIR) || mkdir -p $(BINDIR)
- test -d $(SCRIPTDIR) || mkdir -p $(SCRIPTDIR)
- test -d $(INCLUDEDIR)|| mkdir -p $(INCLUDEDIR)
- test -d $(MANDIR)/man$(MANSUFFIX) || mkdir -p $(MANDIR)/man$(MANSUFFIX)
- cp libsquid.a $(LIBDIR)/
- cp $(HDRS) $(INCLUDEDIR)/
- cp $(POSTCONFHDRS) $(INCLUDEDIR)/
- cp $(PROGS) $(BINDIR)/
-# for scriptfile in $(SCRIPTS); do\
-# cp Scripts/$$scriptfile $(SCRIPTDIR)/;\
-# done
- @for manpage in $(MANS); do\
- $(INSTMAN) Man/$$manpage.man $(MANDIR)/man$(MANSUFFIX)/$$manpage.$(MANSUFFIX);\
- done
-
-uninstall:
- rm $(LIBDIR)/libsquid.a
- for file in $(HDRS); do\
- rm $(INCLUDEDIR)/$$file;\
- done
- for file in $(PROGS); do\
- rm $(BINDIR)/$$file;\
- done
-# for file in $(SCRIPTS); do\
-# rm $(SCRIPTDIR)/$$file;\
-# done
- for file in $(MANS); do\
- rm $(MANDIR)/man$(MANSUFFIX)/$$file.$(MANSUFFIX);\
- done
-
-check: libsquid.a
- (cd Testsuite; make CC="$(CC)" CFLAGS="$(CFLAGS)")
- (cd Testsuite; make check)
-
-distclean:
- -rm -f *.o *~ core TAGS llib-lsquid.ln ccmalloc.log $(PROGS)
- -rm -f Makefile libsquid.a ${POSTCONFHDRS}
- -rm -f config.log config.status
- -rm -rf autom4te.cache
- (cd Testsuite; make distclean)
-
-clean:
- -rm -f *.o *~ core TAGS llib-lsquid.ln ccmalloc.log $(PROGS)
- (cd Testsuite; make clean)
-
-binclean:
- (cd Testsuite; make binclean)
- -rm -f *.o *~ core TAGS llib-lsquid.ln ccmalloc.log
- -rm -f libsquid.a ${POSTCONFHDRS}
- -rm -f config.log config.status
- -rm -rf autom4te.cache
-
-# dist: build a new distribution directory in squid-$RELEASE, and make a tarball.
-# Extracts straight from the CVS repository, so you must first do
-# a "cvs commit" (it checks to be sure you do, at least for the current
-# working directory).
-dist:
-# Delete old versions of the same release
-#
- @if test -d ${BASENAME}-$(RELEASE); then rm -rf ${BASENAME}-$(RELEASE); fi
- @if test -e ${BASENAME}-$(RELEASE).tar; then rm -f ${BASENAME}-$(RELEASE).tar; fi
- @if test -e ${BASENAME}-$(RELEASE).tar.Z; then rm -f ${BASENAME}-$(RELEASE).tar.Z; fi
- @if test -e ${BASENAME}-$(RELEASE).tar.gz; then rm -f ${BASENAME}-$(RELEASE).tar.gz; fi
-#
-# CVS tag and extract. -c: make sure we committed;
-# -F: allow more than one "make dist" per rel
-# prep: must have done "cvs commit", and CVSROOT must be set
-#
- cvs tag -c -F ${RELCODE}
- cvs export -r ${RELCODE} -d ${BASENAME}-${RELEASE} ${BASENAME}
-#
-# Make the configure script from configure.ac
-#
- (cd ${BASENAME}-${RELEASE}; autoconf)
-#
-# Include the appropriate license files
-#
- cp Licenses/LICENSE.${LICENSETAG} ${BASENAME}-${RELEASE}/LICENSE
- cp Licenses/COPYRIGHT.${LICENSETAG} ${BASENAME}-${RELEASE}/COPYRIGHT
-#
-# Put license tags (short licenses) on files that need 'em (replace LICENSE keyword)
-#
- for file in $(READMES) *.c ${HDRS} ${PRECONFHDRS}; do\
- licenseadd.pl Licenses/$(LICENSETAG) ${BASENAME}-${RELEASE}/$$file;\
- done;
-#
-# Remove files/directories that aren't supposed to go out in the distro.
-# Do this last, so other steps (license adding, etc.) have simple loops.
-#
- -rm -rf ${BASENAME}-${RELEASE}/Licenses
- -rm -rf ${BASENAME}-${RELEASE}/Docs
- -rm ${BASENAME}-${RELEASE}/LOG
- -rm ${BASENAME}-${RELEASE}/configure.ac
- -rm ${BASENAME}-${RELEASE}/test_main.c
-#
-# Do replacements
-# (move this to configure)
-# for file in ${MANS}; do\
-# sedition2 @RELEASEDATE@ ${RELEASEDATE} @PACKAGE@ ${PACKAGE} @RELEASE@ ${RELEASE} @COPYRIGHT@ ${COPYRIGHT} HMMER - Biological sequence analysis with profile HMMs ${LICENSE} @SEEALSO@ ${SEEALSO} ${BASENAME}-${RELEASE}/Man/$$file.man;\
-# sedition2 @RELEASEDATE@ ${RELEASEDATE} @PACKAGE@ ${PACKAGE} @RELEASE@ ${RELEASE} @COPYRIGHT@ ${COPYRIGHT} Copyright (C) 1992-2003 Washington University School of Medicine ${LICENSE} @SEEALSO@ ${SEEALSO} ${BASENAME}-${RELEASE}/Man/$$file.man;\
-# sedition2 @RELEASEDATE@ ${RELEASEDATE} @PACKAGE@ ${PACKAGE} @RELEASE@ ${RELEASE} @COPYRIGHT@ ${COPYRIGHT} All Rights Reserved ${LICENSE} @SEEALSO@ ${SEEALSO} ${BASENAME}-${RELEASE}/Man/$$file.man;\
-# sedition2 @RELEASEDATE@ ${RELEASEDATE} @PACKAGE@ ${PACKAGE} @RELEASE@ ${RELEASE} @COPYRIGHT@ ${COPYRIGHT} ${LICENSE} @SEEALSO@ ${SEEALSO} ${BASENAME}-${RELEASE}/Man/$$file.man;\
-# sedition2 @RELEASEDATE@ ${RELEASEDATE} @PACKAGE@ ${PACKAGE} @RELEASE@ ${RELEASE} @COPYRIGHT@ ${COPYRIGHT} This source code is distributed under the terms of the ${LICENSE} @SEEALSO@ ${SEEALSO} ${BASENAME}-${RELEASE}/Man/$$file.man;\
-# sedition2 @RELEASEDATE@ ${RELEASEDATE} @PACKAGE@ ${PACKAGE} @RELEASE@ ${RELEASE} @COPYRIGHT@ ${COPYRIGHT} GNU General Public License. See the files COPYING and LICENSE ${LICENSE} @SEEALSO@ ${SEEALSO} ${BASENAME}-${RELEASE}/Man/$$file.man;\
-# sedition2 @RELEASEDATE@ ${RELEASEDATE} @PACKAGE@ ${PACKAGE} @RELEASE@ ${RELEASE} @COPYRIGHT@ ${COPYRIGHT} for details. ${LICENSE} @SEEALSO@ ${SEEALSO} ${BASENAME}-${RELEASE}/Man/$$file.man;\
-#
- done
-#
-# Set permissions.
-#
- chmod -R ugo+rX ${BASENAME}-${RELEASE}
- chmod +x ${BASENAME}-${RELEASE}/install-sh
- chmod +x ${BASENAME}-${RELEASE}/Testsuite/bug-*
- chmod +x ${BASENAME}-${RELEASE}/Testsuite/x-base-*
-#
-# pack it up!
-#
- tar cvf ${BASENAME}-${RELEASE}.tar ${BASENAME}-${RELEASE}
- ${COMPRESS} ${BASENAME}-$(RELEASE).tar
-
-
-ftpdist:
- cp -f ${BASENAME}-${RELEASE}.tar.gz ${FTPDIR}/
- rm -f ${FTPDIR}/${BASENAME}.tar.gz
- (cd ${FTPDIR}; ln -s ${BASENAME}-${RELEASE}.tar.gz ${BASENAME}.tar.gz )
-
-
-# implode:
-# Collapse a distro version of the squid library, removing all StL-specific
-# development material.
-#
-implode:
- -rm -rf 00CHECKLIST Bugs INSTALL LOG configure.ac
- -rm -rf Docs Formats Licenses Man
-
-TAGS:
- etags -t *.h *.c Makefile.in
-
-
diff --git a/squid/Testsuite/00README b/squid/Testsuite/00README
deleted file mode 100644
index 6bfa704..0000000
--- a/squid/Testsuite/00README
+++ /dev/null
@@ -1,21 +0,0 @@
-SRE, Tue Mar 5 13:28:13 2002
-
-x-* : These are scripts (Perl or sh) that exercise the programs.
- An exercise script returns 0 on success, 1 on failure, so
- another controlling program can execute all exercise-*
- programs and count successes and failures.
- The script is also responsible for printing a message like:
- x-sindex basic sindex tests ... ok.
- x-sreformat basic sreformat tests ... ok.
- x-options-sindex sindex option tests ... FAILED
-^^^++++++++++++++++++++^xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx^... ======
- (20) (35) (6)
- Message width: 70.
- Max script name length: 20.
- Max description length: 35.
- Message: "ok." or "FAILED"
- examples:
- printf(" %-20s %-35s ... %s\n", $ARGV[0], $desc, $status);
- echo " x-sindex basic sindex tests ... ok."
-
-
diff --git a/squid/Testsuite/Makefile.in b/squid/Testsuite/Makefile.in
deleted file mode 100644
index c579ba8..0000000
--- a/squid/Testsuite/Makefile.in
+++ /dev/null
@@ -1,90 +0,0 @@
-###############################################################
-# Makefile for SQUID's testsuite
-# CVS $Id: Makefile.in,v 1.6 2003/05/23 16:16:42 eddy Exp $
-#
-# Note: The autoconf variables in this file must be coordinated
-# with HMMER, if you change them, because HMMER will
-# create a Makefile from this Makefile.in using its own
-# configure script, not SQUID's.
-#
-###########
-# HMMER - Biological sequence analysis with profile HMMs
-# Copyright (C) 1992-2003 Washington University School of Medicine
-# All Rights Reserved
-#
-# This source code is distributed under the terms of the
-# GNU General Public License. See the files COPYING and LICENSE
-# for details.
-###########
-
-SHELL = /bin/sh
-CC = @CC@
-CFLAGS = @CFLAGS@
-DEFS = @DEFS@
-LIBS = -lsquid @LIBS@ -lm
-
-TESTPROGS = iospeed rndspeed
-
-X-BASE = x-base-afetch\
- x-base-alistat\
- x-base-seqstat\
- x-base-sfetch\
- x-base-shuffle\
- x-base-sindex\
- x-base-sreformat
-
-BUGLIST = bug-1-sfetch-paths
-
-################################################################
-# Targets for building the test programs.
-#
-all: sqdconfig $(TESTPROGS)
-
-$(TESTPROGS): @EXEC_DEPENDENCY@
- $(CC) $(CFLAGS) -L../ -I../ $(DEFS) -o $@ $@_main.o $(LIBS)
-
-sqdconfig: @EXEC_DEPENDENCY@
- $(CC) $(CFLAGS) -L../ -I../ $(DEFS) -o $@ $@_main.o $(LIBS)
-
-.c.o:
- $(CC) $(CFLAGS) $(DEFS) -L../ -I../ -c $<
-
-################################################################
-
-
-################################################################
-# 'make check' actually runs the tests.
-#
-check: $(TESTPROGS) sqdconfig
- @for testprog in $(TESTPROGS); do\
- if ./$$testprog; then\
- echo $$testprog: ok;\
- else\
- echo $$testprog: FAILED;\
- fi;\
- done
- @for xprog in $(X-BASE); do\
- ./$$xprog;\
- done
- @for bugprog in $(BUGLIST); do\
- ./$$bugprog;\
- done
-
-#######
-## Miscellaneous
-#######
-
-clean:
- -rm -f *.o *~ Makefile.bak core $(TESTPROGS) TAGS gmon.out sqdconfig
-
-distclean:
- make clean
- -rm -f Makefile
-
-binclean:
- -rm -f *.o *~ Makefile.bak core TAGS gmon.out sqdconfig
-
-TAGS:
- etags -t *.c *.h Makefile.in
-
-
diff --git a/squid/Testsuite/bug-1-sfetch-paths b/squid/Testsuite/bug-1-sfetch-paths
deleted file mode 100644
index 38f5e47..0000000
--- a/squid/Testsuite/bug-1-sfetch-paths
+++ /dev/null
@@ -1,29 +0,0 @@
-#! /usr/bin/perl
-
-# Test for bug #1: sfetch/SSI path bug.
-# sfetch can't follow paths out of current directory if it's using
-# an SSI index.
-# Reported by Zhirong.
-# SRE, Wed Mar 6 21:07:47 2002
-
-use testsuite;
-
-testsuite::description("bug-1-sfetch-path", "sfetch/SSI path bug");
-$tmp = testsuite::tempname();
-$tmpdir = testsuite::tempname();
-mkdir $tmpdir, 0700;
-
-testsuite::run("../shuffle -i --dna -n 10 -t 100 > $tmp");
-testsuite::run("../sindex $tmp > /dev/null");
-testsuite::run("../sfetch -d $tmp randseq5 > /dev/null");
-
-testsuite::run("../shuffle -i --dna -n 10 -t 100 > $tmpdir/foo");
-testsuite::run("../sindex $tmpdir/foo > /dev/null");
-testsuite::run("../sfetch -d $tmpdir/foo randseq5 > /dev/null");
-
-testsuite::run("cp $tmp $tmpdir/");
-testsuite::run("cp $tmp.ssi $tmpdir/");
-testsuite::run("../sfetch -d $tmpdir/$tmp randseq5 > /dev/null");
-
-system("rm -rf $tmpdir");
-testsuite::done();
diff --git a/squid/Testsuite/iospeed_main.c b/squid/Testsuite/iospeed_main.c
deleted file mode 100644
index 13f1511..0000000
--- a/squid/Testsuite/iospeed_main.c
+++ /dev/null
@@ -1,88 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "squid.h"
-#include "stopwatch.h"
-
-#define NSEQ 40000
-#define SEQLEN 200
-
-int
-main(int argc, char **argv)
-{
- SQFILE *sqfp;
- SQINFO sqinfo;
- FILE *fp;
- char *testfile;
- char *buf;
- int buflen;
- int format = SQFILE_FASTA;
- int n = 10;
- int i;
- Stopwatch_t *w;
-
- w = StopwatchCreate();
-
- /* Create the sequence file.
- */
- testfile = tmpnam(NULL);
- if ((fp = fopen(testfile, "w")) == NULL) Die("failed to open %s", testfile);
- for (i = 0; i < NSEQ; i++)
- {
- buf = RandomSequence(AMINO_ALPHABET, aafq, 20, SEQLEN);
- WriteSimpleFASTA(fp, buf, "foo", NULL);
- free(buf);
- }
- fclose(fp);
-
- /* Timing test 1: fgets().
- */
- StopwatchStart(w);
- for (i = 0; i < n; i++) {
- if ((fp = fopen(testfile, "r")) == NULL)
- Die("iospeed failed to open %s", testfile);
- buf = malloc(sizeof(char) * 256);
- buflen = 256;
- while (fgets(buf, buflen, fp) != NULL);
- free(buf);
- fclose(fp);
- }
- StopwatchStop(w);
- StopwatchDisplay(stdout, "fgets(): \t", w);
-
- /* Timing test 2: sre_fgets()
- */
- StopwatchStart(w);
- for (i = 0; i < n; i++) {
- if ((fp = fopen(testfile,"r")) == NULL)
- Die("iospeed failed to open %s", testfile);
- buf = NULL;
- buflen = 0;
- while (sre_fgets(&buf, &buflen, fp) != NULL);
- free(buf);
- fclose(fp);
- }
- StopwatchStop(w);
- StopwatchDisplay(stdout, "sre_fgets(): \t", w);
-
- /* Timing test 3: ReadSeq()
- */
- StopwatchStart(w);
- for (i = 0; i < n; i++) {
- if ((sqfp = SeqfileOpen(testfile, format, NULL)) == NULL)
- Die("iospeed failed to open %s", testfile);
- while (ReadSeq(sqfp, sqfp->format, &buf, &sqinfo)) {
- FreeSequence(buf, &sqinfo);
- }
- SeqfileClose(sqfp);
- }
- StopwatchStop(w);
- StopwatchDisplay(stdout, "ReadSeq(): \t", w);
-
- remove(testfile);
- StopwatchFree(w);
- return(EXIT_SUCCESS);
-}
-
-
-
diff --git a/squid/Testsuite/rndspeed_main.c b/squid/Testsuite/rndspeed_main.c
deleted file mode 100644
index c1295f2..0000000
--- a/squid/Testsuite/rndspeed_main.c
+++ /dev/null
@@ -1,41 +0,0 @@
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "squid.h"
-#include "stopwatch.h"
-
-int
-main(int argc, char **argv)
-{
- int n = 1000000;
- int r1;
- float r2;
- int i;
- Stopwatch_t *w;
-
- w = StopwatchCreate();
-
- /* Timing test 1: Linux/UNIX rand().
- */
- StopwatchStart(w);
- for (i = 0; i < n; i++) {
- r1 = rand();
- }
- StopwatchStop(w);
- StopwatchDisplay(stdout, "rand(): \t", w);
-
- /* Timing test 2: sre_random()
- */
- StopwatchStart(w);
- for (i = 0; i < n; i++) {
- r2 = sre_random();
- }
- StopwatchStop(w);
- StopwatchDisplay(stdout, "sre_random(): \t", w);
-
- StopwatchFree(w);
- return(EXIT_SUCCESS);
-}
-
-
-
diff --git a/squid/Testsuite/sqdconfig_main.c b/squid/Testsuite/sqdconfig_main.c
deleted file mode 100644
index a41a32f..0000000
--- a/squid/Testsuite/sqdconfig_main.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* sqdconfig_main.c
- * SRE, Tue Mar 5 15:58:27 2002 [St. Louis]
- *
- * Small C program designed to print out information on squid's
- * compile-time configuration options - testsuite scripts can
- * call this to determine what optional stuff is compiled in.
- *
- * CVS $Id: sqdconfig_main.c,v 1.1 2002/03/05 23:11:28 eddy Exp $
- */
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "squid.h"
-
-int main(void)
-{
-#ifdef HAS_64BIT_FILE_OFFSETS
- printf("%-30s true\n", "HAS_64BIT_FILE_OFFSETS");
-#else
- printf("%-30s false\n", "HAS_64BIT_FILE_OFFSETS");
-#endif
-}
diff --git a/squid/Testsuite/testsuite.pm b/squid/Testsuite/testsuite.pm
deleted file mode 100644
index f9fc8e8..0000000
--- a/squid/Testsuite/testsuite.pm
+++ /dev/null
@@ -1,44 +0,0 @@
-package testsuite;
-
-$status = 0;
-$ntmp = 0;
-$tmpbase = "sqd_test_out";
-
-sub description {
- my ($name, $desc) = @_;
- $| = 1;
- printf " %-20s %-35s ... ", $name, $desc;
- 1;
-}
-
-sub getconfig {
- my ($cfgprog, $flag) = @_;
- my $output;
- $output = `./$cfgprog`;
- if ($output =~ /$flag\s+false/) { return 0; }
- elsif ($output =~ /$flag\s+true/) { return 1; }
- else { die "$flag not found in output of $cfgprog"; }
- 1;
-}
-
-sub done {
- unlink(<$tmpbase.*>);
- if ($status == 0) { print "ok.\n"; exit(0); }
- else { print "FAILED\n"; exit($status); }
- 1;
-}
-
-sub tempname {
- my $tmp;
- $tmp = "$tmpbase.$ntmp";
- $ntmp++;
- return $tmp;
-}
-
-sub run {
- my ($cmd) = @_;
- system("$cmd 2>/dev/null"); # stderr directed away
- if ($? != 0) { $status = 1; &done(); }
- 1;
-}
-1;
diff --git a/squid/Testsuite/x-base-afetch b/squid/Testsuite/x-base-afetch
deleted file mode 100644
index 0ed6e9d..0000000
--- a/squid/Testsuite/x-base-afetch
+++ /dev/null
@@ -1,11 +0,0 @@
-#! /usr/bin/perl
-
-use testsuite;
-
-testsuite::description("x-base-afetch", "basic tests of afetch");
-$tmp = testsuite::tempname();
-testsuite::run("cp ../Formats/stockholm.2 $tmp");
-testsuite::run("../afetch --index $tmp > /dev/null");
-testsuite::run("../afetch $tmp rrm > /dev/null");
-testsuite::run("../afetch $tmp 14-3-3 > /dev/null");
-testsuite::done();
diff --git a/squid/Testsuite/x-base-alistat b/squid/Testsuite/x-base-alistat
deleted file mode 100644
index 6b461f6..0000000
--- a/squid/Testsuite/x-base-alistat
+++ /dev/null
@@ -1,14 +0,0 @@
-#! /usr/bin/perl
-
-use testsuite;
-
-testsuite::description("x-base-alistat", "basic tests of alistat");
-testsuite::run("../alistat ../Formats/a2m > /dev/null");
-testsuite::run("../alistat ../Formats/clustal > /dev/null");
-testsuite::run("../alistat ../Formats/msf > /dev/null");
-testsuite::run("../alistat ../Formats/phylip > /dev/null");
-testsuite::run("../alistat ../Formats/selex.1 > /dev/null");
-testsuite::run("../alistat ../Formats/selex.2 > /dev/null");
-testsuite::run("../alistat ../Formats/stockholm.1 > /dev/null");
-testsuite::run("../alistat ../Formats/stockholm.2 > /dev/null");
-testsuite::done();
diff --git a/squid/Testsuite/x-base-seqstat b/squid/Testsuite/x-base-seqstat
deleted file mode 100644
index b7cd7af..0000000
--- a/squid/Testsuite/x-base-seqstat
+++ /dev/null
@@ -1,22 +0,0 @@
-#! /usr/bin/perl
-
-use testsuite;
-
-testsuite::description("x-base-seqstat", "basic tests of seqstat");
-testsuite::run("../seqstat ../Formats/a2m > /dev/null");
-testsuite::run("../seqstat ../Formats/clustal > /dev/null");
-testsuite::run("../seqstat ../Formats/embl > /dev/null");
-testsuite::run("../seqstat ../Formats/fasta > /dev/null");
-testsuite::run("../seqstat ../Formats/gcg > /dev/null");
-testsuite::run("../seqstat ../Formats/gcgdata.1 > /dev/null");
-testsuite::run("../seqstat ../Formats/gcgdata.2 > /dev/null");
-testsuite::run("../seqstat ../Formats/genbank > /dev/null");
-testsuite::run("../seqstat ../Formats/msf > /dev/null");
-testsuite::run("../seqstat ../Formats/phylip > /dev/null");
-testsuite::run("../seqstat ../Formats/pir > /dev/null");
-testsuite::run("../seqstat ../Formats/selex.1 > /dev/null");
-testsuite::run("../seqstat ../Formats/selex.2 > /dev/null");
-testsuite::run("../seqstat ../Formats/stockholm.1 > /dev/null");
-testsuite::run("../seqstat ../Formats/stockholm.2 > /dev/null");
-testsuite::run("../seqstat ../Formats/swissprot > /dev/null");
-testsuite::done();
diff --git a/squid/Testsuite/x-base-sfetch b/squid/Testsuite/x-base-sfetch
deleted file mode 100644
index 73a9a10..0000000
--- a/squid/Testsuite/x-base-sfetch
+++ /dev/null
@@ -1,9 +0,0 @@
-#! /usr/bin/perl
-
-use testsuite;
-
-testsuite::description("x-base-sfetch", "basic tests of sfetch");
-testsuite::run("../sfetch -d ../Formats/fasta AC3.1 > /dev/null");
-testsuite::run("../sfetch -d ../Formats/fasta AC3.2 > /dev/null");
-testsuite::run("../sfetch -d ../Formats/fasta AC3.3 > /dev/null");
-testsuite::done();
diff --git a/squid/Testsuite/x-base-shuffle b/squid/Testsuite/x-base-shuffle
deleted file mode 100644
index 4f9b4e2..0000000
--- a/squid/Testsuite/x-base-shuffle
+++ /dev/null
@@ -1,7 +0,0 @@
-#! /usr/bin/perl
-
-use testsuite;
-
-testsuite::description("x-base-shuffle", "basic tests of shuffle");
-testsuite::run("../shuffle ../Formats/fasta > /dev/null");
-testsuite::done();
diff --git a/squid/Testsuite/x-base-sindex b/squid/Testsuite/x-base-sindex
deleted file mode 100644
index c3d689d..0000000
--- a/squid/Testsuite/x-base-sindex
+++ /dev/null
@@ -1,21 +0,0 @@
-#! /usr/bin/perl
-
-use testsuite;
-
-testsuite::description("x-base-sindex", "basic tests of sindex");
-$tmp = testsuite::tempname();
-testsuite::run("../shuffle -i --dna -n 10 -t 100 > $tmp");
-testsuite::run("../sindex $tmp > /dev/null");
-testsuite::run("../sindex --external $tmp > /dev/null");
-
-$largefile = testsuite::getconfig("sqdconfig", "HAS_64BIT_FILE_OFFSETS");
-if ($largefile) {
- testsuite::run("../sindex --64 $tmp > /dev/null");
- testsuite::run("../sindex --64 --external $tmp > /dev/null");
-}
-
-testsuite::done();
-
-
-
-
diff --git a/squid/Testsuite/x-base-sreformat b/squid/Testsuite/x-base-sreformat
deleted file mode 100644
index ca4a162..0000000
--- a/squid/Testsuite/x-base-sreformat
+++ /dev/null
@@ -1,13 +0,0 @@
-#! /usr/bin/perl
-
-use testsuite;
-
-testsuite::description("x-base-sreformat", "basic tests of sreformat");
-testsuite::run("../sreformat fasta ../Formats/fasta > /dev/null");
-testsuite::run("../sreformat embl ../Formats/fasta > /dev/null");
-testsuite::run("../sreformat genbank ../Formats/fasta > /dev/null");
-testsuite::run("../sreformat gcg ../Formats/fasta > /dev/null");
-testsuite::run("../sreformat gcgdata ../Formats/fasta > /dev/null");
-testsuite::run("../sreformat pir ../Formats/fasta > /dev/null");
-testsuite::run("../sreformat raw ../Formats/fasta > /dev/null");
-testsuite::done();
diff --git a/squid/a2m.c b/squid/a2m.c
deleted file mode 100644
index e0a9690..0000000
--- a/squid/a2m.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* a2m.c
- *
- * reading/writing A2M (aligned FASTA) files.
- *
- * CVS $Id: a2m.c,v 1.2 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "squid.h"
-#include "msa.h"
-
-/* Function: ReadA2M()
- * Date: SRE, Sun Jun 6 17:11:29 1999 [bus from Madison 1999 worm mtg]
- *
- * Purpose: Parse an alignment read from an open A2M format
- * alignment file. A2M is a single alignment format.
- * Return the alignment, or NULL if we've already
- * read the alignment.
- *
- * Args: afp - open alignment file
- *
- * Returns: MSA * - an alignment object.
- * Caller responsible for an MSAFree()
- */
-MSA *
-ReadA2M(MSAFILE *afp)
-{
- MSA *msa;
- char *buf;
- char *name;
- char *desc;
- char *seq;
- int idx;
- int len1, len2;
-
- if (feof(afp->f)) return NULL;
-
- name = NULL;
- msa = MSAAlloc(10, 0);
- idx = 0;
- while ((buf = MSAFileGetLine(afp)) != NULL)
- {
- if (*buf == '>')
- {
- buf++; /* skip the '>' */
- if ((name = sre_strtok(&buf, WHITESPACE, &len1)) == NULL)
- Die("Blank name in A2M file %s (line %d)\n", afp->fname, afp->linenumber);
- desc = sre_strtok(&buf, "\n", &len2);
-
- idx = GKIStoreKey(msa->index, name);
- if (idx >= msa->nseqalloc) MSAExpand(msa);
-
- msa->sqname[idx] = sre_strdup(name, len1);
- if (desc != NULL) MSASetSeqDescription(msa, idx, desc);
- msa->nseq++;
- }
- else if (name != NULL)
- {
- if ((seq = sre_strtok(&buf, WHITESPACE, &len1)) == NULL) continue;
- msa->sqlen[idx] = sre_strcat(&(msa->aseq[idx]), msa->sqlen[idx], seq, len1);
- }
- }
- if (name == NULL) { MSAFree(msa); return NULL; }
-
- MSAVerifyParse(msa);
- return msa;
-}
-
-
-/* Function: WriteA2M()
- * Date: SRE, Sun Jun 6 17:40:35 1999 [bus from Madison, 1999 worm mtg]
- *
- * Purpose: Write an "aligned FASTA" (aka a2m, to UCSC) formatted
- * alignment.
- *
- * Args: fp - open FILE to write to.
- * msa - alignment to write
- *
- * Returns: void
- */
-void
-WriteA2M(FILE *fp, MSA *msa)
-{
- int idx; /* sequence index */
- int pos; /* position in sequence */
- char buf[64]; /* buffer for individual lines */
- int cpl = 60; /* char per line; must be < 64 unless buf is bigger */
-
- buf[cpl] = '\0';
- for (idx = 0; idx < msa->nseq; idx++)
- {
- fprintf(fp, ">%s %s\n",
- msa->sqname[idx],
- (msa->sqdesc != NULL && msa->sqdesc[idx] != NULL) ? msa->sqdesc[idx] : "");
- for (pos = 0; pos < msa->alen; pos+=cpl)
- {
- strncpy(buf, &(msa->aseq[idx][pos]), cpl);
- fprintf(fp, "%s\n", buf);
- }
- }
-}
diff --git a/squid/afetch_main.c b/squid/afetch_main.c
deleted file mode 100644
index 3bc5c6b..0000000
--- a/squid/afetch_main.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* afetch_main.c
- * SRE, Tue Nov 9 18:47:02 1999 [Saint Louis]
- *
- * afetch -- a program to extract alignments from the Pfam database
- *
- * CVS $Id: afetch_main.c,v 1.6 2003/05/26 16:21:50 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include "squid.h"
-#include "msa.h"
-#include "ssi.h"
-
-static char banner[] = "afetch - retrieve an alignment from Pfam";
-
-static char usage[] = "\
-Usage: afetch [-options] <alignment database> <name or accession>\n\
- or: afetch --index <alignment database>\n\
-\n\
- Get an alignment from a database.\n\
- Available options:\n\
- -h : help; print version and usage info\n\
-";
-
-static char experts[] = "\
- --index : construct indices for the database\n\
-";
-
-static struct opt_s OPTIONS[] = {
- { "-h", TRUE, sqdARG_NONE },
- { "--index", FALSE, sqdARG_NONE }
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-int
-main(int argc, char **argv)
-{
- char *afile; /* name of alignment file to read */
- MSAFILE *afp; /* pointer to open index file */
- char *key; /* name/accession of alignment to fetch */
- MSA *msa; /* the fetched alignment */
- int format; /* format of afile */
- int do_index; /* TRUE to index instead of retrieve */
-
- char *optname;
- char *optarg;
- int optind;
-
- /***********************************************
- * Parse the command line
- ***********************************************/
-
- /* initializations and defaults */
- format = MSAFILE_STOCKHOLM; /* period. It's the only multi-MSA file format. */
- do_index = FALSE;
- key = NULL;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "--index") == 0) { do_index = TRUE; }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if ((do_index && argc - optind != 1) || (! do_index && argc - optind != 2))
- Die("Incorrect number of command line arguments.\n%s\n", usage);
-
- afile = argv[optind++];
- if (! do_index) key = argv[optind++];
-
- if ((afp = MSAFileOpen(afile, format, NULL)) == NULL)
- Die("Alignment file %s could not be opened for reading", afile);
-
- /***********************************************
- * Section 1. Alignment database indexing
- ***********************************************/
-
- if (do_index) {
- int mode;
- char *ssifile;
- SSIINDEX *si;
- int fh;
- int status;
- SSIOFFSET offset;
- int n = 0;
-
- /* Not that we're expecting an alignment file so
- * large that it would require a 64-bit index, but...
- */
- if ((mode = SSIRecommendMode(afile)) == -1)
- Die("File %s doesn't exist, or is too large for your OS", afile);
-
- ssifile = sre_strdup(afile, -1);
- sre_strcat(&ssifile, -1, ".ssi", -1);
-
- if ((si = SSICreateIndex(mode)) == NULL)
- Die("Couldn't allocate/initialize the new SSI index");
- if (SSIAddFileToIndex(si, afile, afp->format, &fh) != 0)
- Die("SSIAddFileToIndex() failed");
-
- status = SSIGetFilePosition(afp->f, mode, &offset);
- if (status != 0) Die("SSIGetFilePosition() failed");
-
- while ((msa = MSAFileRead(afp)) != NULL)
- {
- if (msa->name == NULL)
- Die("SSI index requires that every MSA has a name");
-
- status = SSIAddPrimaryKeyToIndex(si, msa->name, fh, &offset, NULL, 0);
- if (status != 0) Die("SSIAddPrimaryKeyToIndex() failed");
-
- if (msa->acc != NULL) {
- status = SSIAddSecondaryKeyToIndex(si, msa->acc, msa->name);
- if (status != 0) Die("SSIAddSecondaryKeyToIndex() failed");
- }
-
- status = SSIGetFilePosition(afp->f, mode, &offset);
- if (status != 0) Die("SSIGetFilePosition() failed");
-
- n++;
- MSAFree(msa);
- }
-
- status = SSIWriteIndex(ssifile, si);
- if (status != 0) Die("SSIWriteIndex() failed");
-
- printf ("%d alignments indexed in SSI index %s\n", n, ssifile);
- free(ssifile);
- MSAFileClose(afp);
- SSIFreeIndex(si);
- SqdClean();
- exit (0); /* exit indexing program here */
- }
-
- /***********************************************
- * Section 2. Alignment retrieval
- ***********************************************/
-
- /* Indexed retrieval:
- */
- if (afp->ssi != NULL) {
- if (! MSAFilePositionByKey(afp, key))
- Die("No such alignment %s found in file %s", key, afile);
- msa = MSAFileRead(afp);
- }
- /* Brute force retrieval:
- */
- else {
- while ((msa = MSAFileRead(afp)) != NULL)
- {
- if (strcmp(msa->name, key) == 0) break;
- if (strcmp(msa->acc, key) == 0) break;
- MSAFree(msa);
- }
- }
-
- if (msa == NULL) Die("Failed to retrieve %s from file %s", key, afile);
-
- /* Output the alignment we retrieved
- */
- WriteStockholm(stdout, msa);
-
- MSAFileClose(afp);
- MSAFree(msa);
- exit (0);
-}
diff --git a/squid/aligneval.c b/squid/aligneval.c
deleted file mode 100644
index 1b88f3f..0000000
--- a/squid/aligneval.c
+++ /dev/null
@@ -1,510 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* aligneval.c
- *
- * Comparison of multiple alignments. Three functions are
- * provided, using subtly different scoring schemes:
- * CompareMultAlignments() - basic scoring scheme
- * CompareRefMultAlignments() - only certain "canonical" columns
- * are scored
- *
- * The similarity measure is a fractional alignment identity averaged
- * over all sequence pairs. The score for all pairs is:
- * (identically aligned symbols) / (total aligned columns in
- * known alignment)
- *
- * A column c is identically aligned for sequences i, j if:
- * 1) both i,j have a symbol aligned in column c, and the
- * same pair of symbols is aligned somewhere in the test
- * alignment
- * 2) S[i][c] is aligned to a gap in sequence j, and that symbol
- * is aligned to a gap in the test alignment
- * 3) converse of 2)
- *
- *
- * The algorithm is as follows:
- * 1) For each known/test aligned pair of sequences (k1,k2 and t1,t2)
- * construct a list for each sequence, in which for every
- * counted symbol we record the raw index of the symbol in
- * the other sequence that it aligns to, or -1 if it aligns
- * to a gap or uncounted symbol.
- *
- * 2) Compare the list for k1 to the list for t1 and count an identity
- * for each correct alignment.
- *
- * 3) Repeat 2) for comparing k2 to t2. Note that this means correct sym/sym
- * alignments count for 2; correct sym/gap alignments count for 1.
- *
- * 4) The score is (identities from 2 + identities from 3) /
- * (totals from 2 + totals from 3).
- *
- * Written originally for koala's ss2 pairwise alignment package.
- *
- * Sean Eddy, Sun Nov 1 12:45:11 1992
- * SRE, Thu Jul 29 16:47:18 1993: major revision: all functions replaced by new algorithm
- * CVS $Id: aligneval.c,v 1.9 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include "squid.h"
-#include "sre_random.h"
-
-static int make_alilist(char *s1, char *s2, int **ret_s1_list, int *ret_listlen);
-static int make_ref_alilist(int *refcoords, char *k1, char *k2, char *s1, char *s2,
- int **ret_s1_list, int *ret_listlen);
-static int compare_lists(int *k1, int *k2, int *t1, int *t2, int len1, int len2, float *ret_sc);
-
-
-/* Function: ComparePairAlignments
- *
- * Purpose: Calculate and return a number representing how well two different alignments
- * of a pair of sequences compare. The number is, roughly speaking,
- * the fraction of columns which are identically aligned.
- *
- * For all columns c in which either known1[c] or known2[c]
- * is a non-gap, count an identity if those same symbols are
- * aligned somewhere in calc1/calc2. The score is identities/total
- * columns examined. (i.e. fully gapped columns don't count)
- *
- * more explicitly, identities come from:
- * both known and test aligned pairs have the same symbol in the first sequence aligned to
- * a gap in the second sequence;
- * both known and test aligned pairs have the same symbol in the second sequence
- * aligned to a gap in the first sequence;
- * the known alignment has symbols aligned at this column, and the test
- * alignment aligns the same two symbols.
- *
- * Args: known1, known2: trusted alignment of two sequences
- * calc1, calc2: test alignment of two sequences
- *
- * Return: Returns -1.0 on internal failure.
- */
-float
-ComparePairAlignments(char *known1, char *known2, char *calc1, char *calc2)
-{
- int *klist1;
- int *klist2;
- int *tlist1;
- int *tlist2;
- int len1, len2;
- float score;
-
- if (! make_alilist(calc1, calc2, &tlist1, &len1)) return -1.0;
- if (! make_alilist(calc2, calc1, &tlist2, &len2)) return -1.0;
- if (! make_alilist(known1, known2, &klist1, &len1)) return -1.0;
- if (! make_alilist(known2, known1, &klist2, &len2)) return -1.0;
- if (! compare_lists(klist1, klist2, tlist1, tlist2, len1, len2, &score)) return -1.0;
-
- free(klist1);
- free(klist2);
- free(tlist1);
- free(tlist2);
- return score;
-}
-
-
-
-/* Function: CompareRefPairAlignments()
- *
- * Same as above, but the only columns that count are the ones
- * with indices in *refcoord. *refcoord and the known1, known2
- * pair must be in sync with each other (come from the same
- * multiple sequence alignment)
- *
- * Args: ref - 0..alen-1 array of 1 or 0
- * known1,known2 - trusted alignment
- * calc1, calc2 - test alignment
- *
- * Return: the fractional alignment identity on success, -1.0 on failure.
- */
-float
-CompareRefPairAlignments(int *ref, char *known1, char *known2, char *calc1, char *calc2)
-{
- int *klist1;
- int *klist2;
- int *tlist1;
- int *tlist2;
- int len1, len2;
- float score;
-
- if (! make_ref_alilist(ref, known1, known2, calc1, calc2, &tlist1, &len1)) return -1.0;
- if (! make_ref_alilist(ref, known2, known1, calc2, calc1, &tlist2, &len2)) return -1.0;
- if (! make_ref_alilist(ref, known1, known2, known1, known2, &klist1, &len1)) return -1.0;
- if (! make_ref_alilist(ref, known2, known1, known2, known1, &klist2, &len2)) return -1.0;
- if (! compare_lists(klist1, klist2, tlist1, tlist2, len1, len2, &score)) return -1.0;
-
- free(klist1);
- free(klist2);
- free(tlist1);
- free(tlist2);
- return score;
-}
-
-/* Function: make_alilist()
- *
- * Purpose: Construct a list (array) mapping the raw symbols of s1
- * onto the indexes of the aligned symbols in s2 (or -1
- * for gaps in s2). The list (s1_list) will be of the
- * length of s1's raw sequence.
- *
- * Args: s1 - sequence to construct the list for
- * s2 - sequence s1 is aligned to
- * ret_s1_list - RETURN: the constructed list (caller must free)
- * ret_listlen - RETURN: length of the list
- *
- * Returns: 1 on success, 0 on failure
- */
-static int
-make_alilist(char *s1, char *s2, int **ret_s1_list, int *ret_listlen)
-{
- int *s1_list;
- int col; /* column position in alignment */
- int r1, r2; /* raw symbol index at current col in s1, s2 */
-
- /* Malloc for s1_list. It can't be longer than s1 itself; we just malloc
- * for that (and waste a wee bit of space)
- */
- s1_list = (int *) MallocOrDie (sizeof(int) * strlen(s1));
- r1 = r2 = 0;
- for (col = 0; s1[col] != '\0'; col++)
- {
- /* symbol in s1? Record what it's aligned to, and bump
- * the r1 counter.
- */
- if (! isgap(s1[col]))
- {
- s1_list[r1] = isgap(s2[col]) ? -1 : r2;
- r1++;
- }
-
- /* symbol in s2? bump the r2 counter
- */
- if (! isgap(s2[col]))
- r2++;
- }
-
- *ret_listlen = r1;
- *ret_s1_list = s1_list;
- return 1;
-}
-
-
-
-/* Function: make_ref_alilist()
- *
- * Purpose: Construct a list (array) mapping the raw symbols of s1
- * which are under canonical columns of the ref alignment
- * onto the indexes of the aligned symbols in s2 (or -1
- * for gaps in s2 or noncanonical symbols in s2).
- *
- * Args: ref: - array of indices of canonical coords (1 canonical, 0 non)
- * k1 - s1's known alignment (w/ respect to refcoords)
- * k2 - s2's known alignment (w/ respect to refcoords)
- * s1 - sequence to construct the list for
- * s2 - sequence s1 is aligned to
- * ret_s1_list - RETURN: the constructed list (caller must free)
- * ret_listlen - RETURN: length of the list
- *
- * Returns: 1 on success, 0 on failure
- */
-/*ARGSUSED*/
-static int
-make_ref_alilist(int *ref, char *k1, char *k2,
- char *s1, char *s2, int **ret_s1_list, int *ret_listlen)
-{
- int *s1_list;
- int col; /* column position in alignment */
- int r1, r2; /* raw symbol index at current col in s1, s2 */
- int *canons1; /* flag array, 1 if position i in s1 raw seq is canonical */
- int lpos; /* position in list */
-
- /* Allocations. No arrays can exceed the length of their
- * appropriate parent (s1 or s2)
- */
- s1_list = (int *) MallocOrDie (sizeof(int) * strlen(s1));
- canons1 = (int *) MallocOrDie (sizeof(int) * strlen(s1));
-
- /* First we use refcoords and k1,k2 to construct an array of 1's
- * and 0's, telling us whether s1's raw symbol number i is countable.
- * It's countable simply if it's under a canonical column.
- */
- r1 = 0;
- for (col = 0; k1[col] != '\0'; col++)
- {
- if (! isgap(k1[col]))
- {
- canons1[r1] = ref[col] ? 1 : 0;
- r1++;
- }
- }
-
- /* Now we can construct the list. We don't count pairs if the sym in s1
- * is non-canonical.
- * We have to keep separate track of our position in the list (lpos)
- * from our positions in the raw sequences (r1,r2)
- */
- r1 = r2 = lpos = 0;
- for (col = 0; s1[col] != '\0'; col++)
- {
- if (! isgap(s1[col]) && canons1[r1])
- {
- s1_list[lpos] = isgap(s2[col]) ? -1 : r2;
- lpos++;
- }
-
- if (! isgap(s1[col]))
- r1++;
- if (! isgap(s2[col]))
- r2++;
- }
-
- free(canons1);
- *ret_listlen = lpos;
- *ret_s1_list = s1_list;
- return 1;
-}
-
-/* Function: compare_lists()
- *
- * Purpose: Given four alignment lists (k1,k2, t1,t2), calculate the
- * alignment score.
- *
- * Args: k1 - list of k1's alignment to k2
- * k2 - list of k2's alignment to k1
- * t1 - list of t1's alignment to t2
- * t2 - list of t2's alignment to t2
- * len1 - length of k1, t1 lists (same by definition)
- * len2 - length of k2, t2 lists (same by definition)
- * ret_sc - RETURN: identity score of alignment
- *
- * Return: 1 on success, 0 on failure.
- */
-static int
-compare_lists(int *k1, int *k2, int *t1, int *t2, int len1, int len2, float *ret_sc)
-{
- float id;
- float tot;
- int i;
-
- id = tot = 0.0;
- for (i = 0; i < len1; i++)
- {
- tot += 1.0;
- if (t1[i] == k1[i]) id += 1.0;
- }
-
- for ( i = 0; i < len2; i++)
- {
- tot += 1.0;
- if (k2[i] == t2[i]) id += 1.0;
- }
-
- *ret_sc = id / tot;
- return 1;
-}
-
-
-/* Function: CompareMultAlignments
- *
- * Purpose: Invokes pairwise alignment comparison for every possible pair,
- * and returns the average score over all N(N-1) of them or -1.0
- * on an internal failure.
- *
- * Can be slow for large N, since it's quadratic.
- *
- * Args: kseqs - trusted multiple alignment
- * tseqs - test multiple alignment
- * N - number of sequences
- *
- * Return: average identity score, or -1.0 on failure.
- */
-float
-CompareMultAlignments(char **kseqs, char **tseqs, int N)
-{
- int i, j; /* counters for sequences */
- float score;
- float tot_score = 0.0;
- /* do all pairwise comparisons */
- for (i = 0; i < N; i++)
- for (j = i+1; j < N; j++)
- {
- score = ComparePairAlignments(kseqs[i], kseqs[j], tseqs[i], tseqs[j]);
- if (score < 0.0) return -1.0;
- tot_score += score;
- }
- return ((tot_score * 2.0) / ((float) N * ((float) N - 1.0)));
-}
-
-
-
-/* Function: CompareRefMultAlignments()
- *
- * Purpose: Same as above, except an array of reference coords for
- * the canonical positions of the known alignment is also
- * provided.
- *
- * Args: ref : 0..alen-1 array of 1/0 flags, 1 if canon
- * kseqs : trusted alignment
- * tseqs : test alignment
- * N : number of sequences
- *
- * Return: average identity score, or -1.0 on failure
- */
-float
-CompareRefMultAlignments(int *ref, char **kseqs, char **tseqs, int N)
-{
- int i, j; /* counters for sequences */
- float score;
- float tot_score = 0.0;
-
- /* do all pairwise comparisons */
- for (i = 0; i < N; i++)
- for (j = i+1; j < N; j++)
- {
- score = CompareRefPairAlignments(ref, kseqs[i], kseqs[j], tseqs[i], tseqs[j]);
- if (score < 0.0) return -1.0;
- tot_score += score;
- }
- return ((tot_score * 2.0)/ ((float) N * ((float) N - 1.0)));
-}
-
-/* Function: PairwiseIdentity()
- *
- * Purpose: Calculate the pairwise fractional identity between
- * two aligned sequences s1 and s2. This is simply
- * (idents / MIN(len1, len2)).
- *
- * Note how many ways there are to calculate pairwise identity,
- * because of the variety of choices for the denominator:
- * idents/(idents+mismat) has the disadvantage that artifactual
- * gappy alignments would have high "identities".
- * idents/(AVG|MAX)(len1,len2) both have the disadvantage that
- * alignments of fragments to longer sequences would have
- * artifactually low "identities".
- *
- * Case sensitive; also, watch out in nucleic acid alignments;
- * U/T RNA/DNA alignments will be counted as mismatches!
- */
-float
-PairwiseIdentity(char *s1, char *s2)
-{
- int idents; /* total identical positions */
- int len1, len2; /* lengths of seqs */
- int x; /* position in aligned seqs */
-
- idents = len1 = len2 = 0;
- for (x = 0; s1[x] != '\0' && s2[x] != '\0'; x++)
- {
- if (!isgap(s1[x])) {
- len1++;
- if (s1[x] == s2[x]) idents++;
- }
- if (!isgap(s2[x])) len2++;
- }
- if (len2 < len1) len1 = len2;
- return (len1 == 0 ? 0.0 : (float) idents / (float) len1);
-}
-
-
-
-/* Function: AlignmentIdentityBySampling()
- * Date: SRE, Mon Oct 19 14:29:01 1998 [St. Louis]
- *
- * Purpose: Estimate and return the average pairwise
- * fractional identity of an alignment,
- * using sampling.
- *
- * For use when there's so many sequences that
- * an all vs. all rigorous calculation will
- * take too long.
- *
- * Case sensitive!
- *
- * Args: aseq - aligned sequences
- * L - length of alignment
- * N - number of seqs in alignment
- * nsample - number of samples
- *
- * Returns: average fractional identity, 0..1.
- */
-float
-AlignmentIdentityBySampling(char **aseq, int L, int N, int nsample)
-{
- int x, i, j; /* counters */
- float sum;
-
- if (N < 2) return 1.0;
-
- sum = 0.;
- for (x = 0; x < nsample; x++)
- {
- i = CHOOSE(N);
- do { j = CHOOSE(N); } while (j == i); /* make sure j != i */
- sum += PairwiseIdentity(aseq[i], aseq[j]);
- }
- return sum / (float) nsample;
-}
-
-/* Function: MajorityRuleConsensus()
- * Date: SRE, Tue Mar 7 15:30:30 2000 [St. Louis]
- *
- * Purpose: Given a set of aligned sequences, produce a
- * majority rule consensus sequence. If >50% nonalphabetic
- * (usually meaning gaps) in the column, ignore the column.
- *
- * Args: aseq - aligned sequences, [0..nseq-1][0..alen-1]
- * nseq - number of sequences
- * alen - length of alignment
- *
- * Returns: ptr to allocated consensus sequence.
- * Caller is responsible for free'ing this.
- */
-char *
-MajorityRuleConsensus(char **aseq, int nseq, int alen)
-{
- char *cs; /* RETURN: consensus sequence */
- int count[27]; /* counts for a..z and gaps in a column */
- int idx,apos; /* counters for seq, column */
- int spos; /* position in cs */
- int x; /* counter for characters */
- int sym;
- int max, bestx;
-
- cs = MallocOrDie(sizeof(char) * (alen+1));
-
- for (spos=0,apos=0; apos < alen; apos++)
- {
- for (x = 0; x < 27; x++) count[x] = 0;
-
- for (idx = 0; idx < nseq; idx++)
- {
- if (isalpha((int) aseq[idx][apos])) {
- sym = toupper((int) aseq[idx][apos]);
- count[sym-'A']++;
- } else {
- count[26]++;
- }
- }
-
- if ((float) count[26] / (float) nseq <= 0.5) {
- max = bestx = -1;
- for (x = 0; x < 26; x++)
- if (count[x] > max) { max = count[x]; bestx = x; }
- cs[spos++] = (char) ('A' + bestx);
- }
- }
- cs[spos] = '\0';
- return cs;
-}
diff --git a/squid/alignio.c b/squid/alignio.c
deleted file mode 100644
index 32f3612..0000000
--- a/squid/alignio.c
+++ /dev/null
@@ -1,642 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* alignio.c
- * SRE, Mon Jul 12 11:57:37 1993
- * CVS $Id: alignio.c,v 1.12 2003/04/14 16:00:16 eddy Exp $
- *
- * Input/output of sequence alignments.
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include "squid.h"
-#include "sre_random.h"
-
-/* Function: AllocAlignment()
- *
- * Purpose: Allocate space for an alignment, given the number
- * of sequences and the alignment length in columns.
- *
- * Args: nseq - number of sequences
- * alen - width of alignment
- * ret_aseq - RETURN: alignment itself
- * ainfo - RETURN: other info associated with alignment
- *
- * Return: (void)
- * aseq, ainfo free'd by caller: FreeAlignment(aseq, &ainfo).
- * note that ainfo itself is alloc'ed in caller, usually
- * just by a "AINFO ainfo" definition.
- */
-void
-AllocAlignment(int nseq, int alen, char ***ret_aseq, AINFO *ainfo)
-{
- char **aseq;
- int idx;
-
- InitAinfo(ainfo);
-
- aseq = (char **) MallocOrDie (sizeof(char *) * nseq);
- for (idx = 0; idx < nseq; idx++)
- aseq[idx] = (char *) MallocOrDie (sizeof(char) * (alen+1));
-
- ainfo->alen = alen;
- ainfo->nseq = nseq;
-
- ainfo->wgt = (float *) MallocOrDie (sizeof(float) * nseq);
- FSet(ainfo->wgt, nseq, 1.0);
-
- ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO) * nseq);
- for (idx = 0; idx < nseq; idx++)
- ainfo->sqinfo[idx].flags = 0;
-
- *ret_aseq = aseq;
-}
-
-
-/* Function: InitAinfo()
- * Date: SRE, Tue Jan 19 10:16:02 1999 [St. Louis]
- *
- * Purpose: Initialize the fields in ainfo structure to
- * default (null) values. Does nothing with
- * fields that are dependent on nseq or alen.
- *
- * Args: ainfo - optional info structure for an alignment
- *
- * Returns: (void). ainfo is modified.
- */
-void
-InitAinfo(AINFO *ainfo)
-{
- ainfo->name = NULL;
- ainfo->desc = NULL;
- ainfo->cs = NULL;
- ainfo->rf = NULL;
- ainfo->acc = NULL;
- ainfo->au = NULL;
- ainfo->flags = 0;
-
- ainfo->tc1 = ainfo->tc2 = 0.0;
- ainfo->nc1 = ainfo->nc2 = 0.0;
- ainfo->ga1 = ainfo->ga2 = 0.0;
-}
-
-
-/* Function: FreeAlignment()
- *
- * Purpose: Free the space allocated to alignment, names, and optional
- * information.
- *
- * Args: aseqs - sequence alignment
- * ainfo - associated alignment data.
- */
-void
-FreeAlignment(char **aseqs, AINFO *ainfo)
-{
- int i;
-
- for (i = 0; i < ainfo->nseq; i++)
- {
- if (ainfo->sqinfo[i].flags & SQINFO_SS) free(ainfo->sqinfo[i].ss);
- if (ainfo->sqinfo[i].flags & SQINFO_SA) free(ainfo->sqinfo[i].sa);
- }
- if (ainfo->cs != NULL) free(ainfo->cs);
- if (ainfo->rf != NULL) free(ainfo->rf);
- if (ainfo->name != NULL) free(ainfo->name);
- if (ainfo->desc != NULL) free(ainfo->desc);
- if (ainfo->acc != NULL) free(ainfo->acc);
- if (ainfo->au != NULL) free(ainfo->au);
-
- free(ainfo->sqinfo);
- free(ainfo->wgt);
- Free2DArray((void **) aseqs, ainfo->nseq);
-}
-
-
-
-/* Function: SAMizeAlignment()
- * Date: SRE, Tue Jun 30 09:49:40 1998 [St. Louis]
- *
- * Purpose: Make a "best effort" attempt to convert an alignment
- * to SAM gap format: - in delete col, . in insert col.
- * Only works if alignment adheres to SAM's upper/lower
- * case convention, which is true for instance of old
- * HMMER alignments.
- *
- * Args: aseq - alignment to convert
- * nseq - number of seqs in alignment
- * alen - length of alignment
- *
- * Returns: (void)
- */
-void
-SAMizeAlignment(char **aseq, int nseq, int alen)
-{
- int col; /* counter for aligned columns */
- int i; /* counter for seqs */
- int sawlower, sawupper, sawgap;
- char gapchar;
-
- for (col = 0; col < alen; col++)
- {
- sawlower = sawupper = sawgap = 0;
- /* pass 1: do we see only upper or lower? */
- for (i = 0; i < nseq; i++)
- {
- if (isgap(aseq[i][col])) { sawgap = 1; continue; }
- if (isupper((int) aseq[i][col])) { sawupper = 1; continue; }
- if (islower((int) aseq[i][col])) sawlower = 1;
- }
- /* select gap character for column */
- gapchar = '-'; /* default */
- if (sawlower && ! sawupper) gapchar = '.';
-
- /* pass 2: set gap char */
- for (i = 0; i < nseq; i++)
- if (isgap(aseq[i][col])) aseq[i][col] = gapchar;
- }
-}
-
-
-/* Function: SAMizeAlignmentByGapFrac()
- * Date: SRE, Tue Jun 30 10:58:38 1998 [St. Louis]
- *
- * Purpose: Convert an alignment to SAM's gap and case
- * conventions, using gap fraction in a column
- * to choose match versus insert columns. In match columns,
- * residues are upper case and gaps are '-'.
- * In insert columns, residues are lower case and
- * gaps are '.'
- *
- * Args: aseq - aligned sequences
- * nseq - number of sequences
- * alen - length of alignment
- * maxgap - if more gaps than this fraction, column is insert.
- *
- * Returns: (void) Characters in aseq may be altered.
- */
-void
-SAMizeAlignmentByGapFrac(char **aseq, int nseq, int alen, float maxgap)
-{
- int apos; /* counter over columns */
- int idx; /* counter over sequences */
- int ngap; /* number of gaps seen */
-
- for (apos = 0; apos < alen; apos++)
- {
- /* count gaps */
- ngap = 0;
- for (idx = 0; idx < nseq; idx++)
- if (isgap(aseq[idx][apos])) ngap++;
-
- /* convert to SAM conventions */
- if ((float) ngap / (float) nseq > maxgap)
- { /* insert column */
- for (idx = 0; idx < nseq; idx++)
- if (isgap(aseq[idx][apos])) aseq[idx][apos] = '.';
- else aseq[idx][apos] = (char) tolower((int) aseq[idx][apos]);
- }
- else
- { /* match column */
- for (idx = 0; idx < nseq; idx++)
- if (isgap(aseq[idx][apos])) aseq[idx][apos] = '-';
- else aseq[idx][apos] = (char) toupper((int) aseq[idx][apos]);
- }
- }
-}
-
-
-
-
-/* Function: MakeAlignedString()
- *
- * Purpose: Given a raw string of some type (secondary structure, say),
- * align it to a given aseq by putting gaps wherever the
- * aseq has gaps.
- *
- * Args: aseq: template for alignment
- * alen: length of aseq
- * ss: raw string to align to aseq
- * ret_s: RETURN: aligned ss
- *
- * Return: 1 on success, 0 on failure (and squid_errno is set.)
- * ret_ss is malloc'ed here and must be free'd by caller.
- */
-int
-MakeAlignedString(char *aseq, int alen, char *ss, char **ret_s)
-{
- char *new;
- int apos, rpos;
-
- new = (char *) MallocOrDie ((alen+1) * sizeof(char));
- for (apos = rpos = 0; apos < alen; apos++)
- if (! isgap(aseq[apos]))
- {
- new[apos] = ss[rpos];
- rpos++;
- }
- else
- new[apos] = '.';
- new[apos] = '\0';
-
- if (rpos != strlen(ss))
- { squid_errno = SQERR_PARAMETER; free(new); return 0; }
- *ret_s = new;
- return 1;
-}
-
-
-/* Function: MakeDealignedString()
- *
- * Purpose: Given an aligned string of some type (either sequence or
- * secondary structure, for instance), dealign it relative
- * to a given aseq. Return a ptr to the new string.
- *
- * Args: aseq : template alignment
- * alen : length of aseq
- * ss: : string to make dealigned copy of; same length as aseq
- * ret_s : RETURN: dealigned copy of ss
- *
- * Return: 1 on success, 0 on failure (and squid_errno is set)
- * ret_s is alloc'ed here and must be freed by caller
- */
-int
-MakeDealignedString(char *aseq, int alen, char *ss, char **ret_s)
-{
- char *new;
- int apos, rpos;
-
- new = (char *) MallocOrDie ((alen+1) * sizeof(char));
- for (apos = rpos = 0; apos < alen; apos++)
- if (! isgap(aseq[apos]))
- {
- new[rpos] = ss[apos];
- rpos++;
- }
- new[rpos] = '\0';
- if (alen != strlen(ss))
- { squid_errno = SQERR_PARAMETER; free(new); return 0; }
- *ret_s = new;
- return 1;
-}
-
-
-/* Function: DealignedLength()
- *
- * Purpose: Count the number of non-gap symbols in seq.
- * (i.e. find the length of the unaligned sequence)
- *
- * Args: aseq - aligned sequence to count symbols in, \0 terminated
- *
- * Return: raw length of seq.
- */
-int
-DealignedLength(char *aseq)
-{
- int rlen;
- for (rlen = 0; *aseq; aseq++)
- if (! isgap(*aseq)) rlen++;
- return rlen;
-}
-
-
-/* Function: WritePairwiseAlignment()
- *
- * Purpose: Write a nice formatted pairwise alignment out,
- * with a BLAST-style middle line showing identities
- * as themselves (single letter) and conservative
- * changes as '+'.
- *
- * Args: ofp - open fp to write to (stdout, perhaps)
- * aseq1, aseq2 - alignments to write (not necessarily
- * flushed right with gaps)
- * name1, name2 - names of sequences
- * spos1, spos2 - starting position in each (raw) sequence
- * pam - PAM matrix; positive values define
- * conservative changes
- * indent - how many extra spaces to print on left
- *
- * Return: 1 on success, 0 on failure
- */
-int
-WritePairwiseAlignment(FILE *ofp,
- char *aseq1, char *name1, int spos1,
- char *aseq2, char *name2, int spos2,
- int **pam, int indent)
-{
- char sname1[11]; /* shortened name */
- char sname2[11];
- int still_going; /* True if writing another block */
- char buf1[61]; /* buffer for writing seq1; CPL+1*/
- char bufmid[61]; /* buffer for writing consensus */
- char buf2[61];
- char *s1, *s2; /* ptrs into each sequence */
- int count1, count2; /* number of symbols we're writing */
- int rpos1, rpos2; /* position in raw seqs */
- int rawcount1, rawcount2; /* number of nongap symbols written */
- int apos;
-
- strncpy(sname1, name1, 10);
- sname1[10] = '\0';
- strtok(sname1, WHITESPACE);
-
- strncpy(sname2, name2, 10);
- sname2[10] = '\0';
- strtok(sname2, WHITESPACE);
-
- s1 = aseq1;
- s2 = aseq2;
- rpos1 = spos1;
- rpos2 = spos2;
-
- still_going = TRUE;
- while (still_going)
- {
- still_going = FALSE;
-
- /* get next line's worth from both */
- strncpy(buf1, s1, 60); buf1[60] = '\0';
- strncpy(buf2, s2, 60); buf2[60] = '\0';
- count1 = strlen(buf1);
- count2 = strlen(buf2);
-
- /* is there still more to go? */
- if ((count1 == 60 && s1[60] != '\0') ||
- (count2 == 60 && s2[60] != '\0'))
- still_going = TRUE;
-
- /* shift seq ptrs by a line */
- s1 += count1;
- s2 += count2;
-
- /* assemble the consensus line */
- for (apos = 0; apos < count1 && apos < count2; apos++)
- {
- if (!isgap(buf1[apos]) && !isgap(buf2[apos]))
- {
- if (buf1[apos] == buf2[apos])
- bufmid[apos] = buf1[apos];
- else if (pam[buf1[apos] - 'A'][buf2[apos] - 'A'] > 0)
- bufmid[apos] = '+';
- else
- bufmid[apos] = ' ';
- }
- else
- bufmid[apos] = ' ';
- }
- bufmid[apos] = '\0';
-
- rawcount1 = 0;
- for (apos = 0; apos < count1; apos++)
- if (!isgap(buf1[apos])) rawcount1++;
-
- rawcount2 = 0;
- for (apos = 0; apos < count2; apos++)
- if (!isgap(buf2[apos])) rawcount2++;
-
- (void) fprintf(ofp, "%*s%-10.10s %5d %s %5d\n", indent, "",
- sname1, rpos1, buf1, rpos1 + rawcount1 -1);
- (void) fprintf(ofp, "%*s %s\n", indent, "",
- bufmid);
- (void) fprintf(ofp, "%*s%-10.10s %5d %s %5d\n", indent, "",
- sname2, rpos2, buf2, rpos2 + rawcount2 -1);
- (void) fprintf(ofp, "\n");
-
- rpos1 += rawcount1;
- rpos2 += rawcount2;
- }
-
- return 1;
-}
-
-
-/* Function: MingapAlignment()
- *
- * Purpose: Remove all-gap columns from a multiple sequence alignment
- * and its associated data. The alignment is assumed to be
- * flushed (all aseqs the same length).
- */
-int
-MingapAlignment(char **aseqs, AINFO *ainfo)
-{
- int apos; /* position in original alignment */
- int mpos; /* position in new alignment */
- int idx;
-
- /* We overwrite aseqs, using its allocated memory.
- */
- for (apos = 0, mpos = 0; aseqs[0][apos] != '\0'; apos++)
- {
- /* check for all-gap in column */
- for (idx = 0; idx < ainfo->nseq; idx++)
- if (! isgap(aseqs[idx][apos]))
- break;
- if (idx == ainfo->nseq) continue;
-
- /* shift alignment and ainfo */
- if (mpos != apos)
- {
- for (idx = 0; idx < ainfo->nseq; idx++)
- aseqs[idx][mpos] = aseqs[idx][apos];
-
- if (ainfo->cs != NULL) ainfo->cs[mpos] = ainfo->cs[apos];
- if (ainfo->rf != NULL) ainfo->rf[mpos] = ainfo->rf[apos];
- }
- mpos++;
- }
- /* null terminate everything */
- for (idx = 0; idx < ainfo->nseq; idx++)
- aseqs[idx][mpos] = '\0';
- ainfo->alen = mpos; /* set new length */
- if (ainfo->cs != NULL) ainfo->cs[mpos] = '\0';
- if (ainfo->rf != NULL) ainfo->rf[mpos] = '\0';
- return 1;
-}
-
-
-
-/* Function: RandomAlignment()
- *
- * Purpose: Create a random alignment from raw sequences.
- *
- * Ideally, we would like to sample an alignment from the
- * space of possible alignments according to its probability,
- * given a prior probability distribution for alignments.
- * I don't see how to describe such a distribution, let alone
- * sample it.
- *
- * This is a rough approximation that tries to capture some
- * desired properties. We assume the alignment is generated
- * by a simple HMM composed of match and insert states.
- * Given parameters (pop, pex) for the probability of opening
- * and extending an insertion, we can find the expected number
- * of match states, M, in the underlying model for each sequence.
- * We use an average M taken over all the sequences (this is
- * an approximation. The expectation of M given all the sequence
- * lengths is a nasty-looking summation.)
- *
- * M = len / ( 1 + pop ( 1 + 1/ (1-pex) ) )
- *
- * Then, we assign positions in each raw sequence onto the M match
- * states and M+1 insert states of this "HMM", by rolling random
- * numbers and inserting the (rlen-M) inserted positions randomly
- * into the insert slots, taking into account the relative probability
- * of open vs. extend.
- *
- * The resulting alignment has two desired properties: insertions
- * tend to follow the HMM-like exponential distribution, and
- * the "sparseness" of the alignment is controllable through
- * pop and pex.
- *
- * Args: rseqs - raw sequences to "align", 0..nseq-1
- * sqinfo - array of 0..nseq-1 info structures for the sequences
- * nseq - number of sequences
- * pop - probability to open insertion (0<pop<1)
- * pex - probability to extend insertion (0<pex<1)
- * ret_aseqs - RETURN: alignment (flushed)
- * ainfo - fill in: alignment info
- *
- * Return: 1 on success, 0 on failure. Sets squid_errno to indicate cause
- * of failure.
- */
-int
-RandomAlignment(char **rseqs, SQINFO *sqinfo, int nseq, float pop, float pex,
- char ***ret_aseqs, AINFO *ainfo)
-{
- char **aseqs; /* RETURN: alignment */
- int alen; /* length of alignment */
- int *rlen; /* lengths of each raw sequence */
- int M; /* length of "model" */
- int **ins; /* insertion counts, 0..nseq-1 by 0..M */
- int *master_ins; /* max insertion counts, 0..M */
- int apos, rpos, idx;
- int statepos;
- int count;
- int minlen;
-
- /* calculate expected length of model, M
- */
- rlen = (int *) MallocOrDie (sizeof(int) * nseq);
- M = 0;
- minlen = 9999999;
- for (idx = 0; idx < nseq; idx++)
- {
- rlen[idx] = strlen(rseqs[idx]);
- M += rlen[idx];
- minlen = (rlen[idx] < minlen) ? rlen[idx] : minlen;
- }
- M = (int) ((float) M / (1.0 + pop * (1.0 + 1.0 / (1.0 - pex))));
- M /= nseq;
- if (M > minlen) M = minlen;
-
- /* make arrays that count insertions in M+1 possible insert states
- */
- ins = (int **) MallocOrDie (sizeof(int *) * nseq);
- master_ins = (int *) MallocOrDie (sizeof(int) * (M+1));
- for (idx = 0; idx < nseq; idx++)
- {
- ins[idx] = (int *) MallocOrDie (sizeof(int) * (M+1));
- for (rpos = 0; rpos <= M; rpos++)
- ins[idx][rpos] = 0;
- }
- /* normalize */
- pop = pop / (pop+pex);
- pex = 1.0 - pop;
- /* make insertions for individual sequences */
- for (idx = 0; idx < nseq; idx++)
- {
- apos = -1;
- for (rpos = 0; rpos < rlen[idx]-M; rpos++)
- {
- if (sre_random() < pop || apos == -1) /* open insertion */
- apos = CHOOSE(M+1); /* choose 0..M */
- ins[idx][apos]++;
- }
- }
- /* calculate master_ins, max inserts */
- alen = M;
- for (apos = 0; apos <= M; apos++)
- {
- master_ins[apos] = 0;
- for (idx = 0; idx < nseq; idx++)
- if (ins[idx][apos] > master_ins[apos])
- master_ins[apos] = ins[idx][apos];
- alen += master_ins[apos];
- }
-
-
- /* Now, construct alignment
- */
- aseqs = (char **) MallocOrDie (sizeof (char *) * nseq);
- for (idx = 0; idx < nseq; idx++)
- aseqs[idx] = (char *) MallocOrDie (sizeof(char) * (alen+1));
- for (idx = 0; idx < nseq; idx++)
- {
- apos = rpos = 0;
-
- for (statepos = 0; statepos <= M; statepos++)
- {
- for (count = 0; count < ins[idx][statepos]; count++)
- aseqs[idx][apos++] = rseqs[idx][rpos++];
- for (; count < master_ins[statepos]; count++)
- aseqs[idx][apos++] = ' ';
-
- if (statepos != M)
- aseqs[idx][apos++] = rseqs[idx][rpos++];
- }
- aseqs[idx][alen] = '\0';
- }
- ainfo->flags = 0;
- ainfo->alen = alen;
- ainfo->nseq = nseq;
- ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO) * nseq);
- for (idx = 0; idx < nseq; idx++)
- SeqinfoCopy(&(ainfo->sqinfo[idx]), &(sqinfo[idx]));
-
- free(rlen);
- free(master_ins);
- Free2DArray((void **) ins, nseq);
- *ret_aseqs = aseqs;
- return 1;
-}
-
-/* Function: AlignmentHomogenousGapsym()
- * Date: SRE, Sun Mar 19 19:37:12 2000 [wren, St. Louis]
- *
- * Purpose: Sometimes we've got to convert alignments to
- * a lowest common denominator, and we need
- * a single specific gap character -- for example,
- * PSI-BLAST blastpgp -B takes a very simplistic
- * alignment input format which appears to only
- * allow '-' as a gap symbol.
- *
- * Anything matching the isgap() macro is
- * converted.
- *
- * Args: aseq - aligned character strings, [0..nseq-1][0..alen-1]
- * nseq - number of aligned strings
- * alen - length of alignment
- * gapsym - character to use for gaps.
- *
- * Returns: void ("never fails")
- */
-void
-AlignmentHomogenousGapsym(char **aseq, int nseq, int alen, char gapsym)
-{
- int i, apos;
-
- for (i = 0; i < nseq; i++)
- for (apos = 0; apos < alen; apos++)
- if (isgap(aseq[i][apos])) aseq[i][apos] = gapsym;
-}
diff --git a/squid/alistat_main.c b/squid/alistat_main.c
deleted file mode 100644
index 349f0d8..0000000
--- a/squid/alistat_main.c
+++ /dev/null
@@ -1,275 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* alistat_main.c
- * Fri Jan 27 10:41:41 1995
- * CVS $Id: alistat_main.c,v 1.8 2003/04/14 16:00:16 eddy Exp $
- *
- * Look at an alignment file, determine some simple statistics.
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "squid.h"
-#include "msa.h"
-
-static char banner[] = "alistat - show some simple statistics on an alignment file";
-
-static char usage[] = "\
-Usage: alistat [-options] <alignment file>\n\
- Available options:\n\
- -a : report per-sequence info, not just a summary\n\
- -f : fast: estimate average %id by sampling (not compatible with -a)\n\
- -h : help: display usage and version\n\
- -q : quiet: suppress verbose header\n\
-";
-
-static char experts[] = "\
- Expert options:\n\
- --consensus <f>: write majority rule consensus sequence(s) in FASTA\n\
- format to file <f>\n\
- --identmx <f> : save a report on all NxN pairwise identities to file <f>\n\
- --informat <s> : specify alignment file format <s>\n\
- allowed formats: SELEX, MSF, Clustal, a2m, PHYLIP\n\
-";
-
-struct opt_s OPTIONS[] = {
- { "-a", TRUE, sqdARG_NONE },
- { "-f", TRUE, sqdARG_NONE },
- { "-h", TRUE, sqdARG_NONE },
- { "-q", TRUE, sqdARG_NONE },
- { "--consensus", FALSE, sqdARG_STRING },
- { "--identmx", FALSE, sqdARG_STRING },
- { "--informat", FALSE, sqdARG_STRING },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-int
-main(int argc, char **argv)
-{
- char *afile; /* name of aligned sequence file */
- MSAFILE *afp; /* pointer to open alignment file*/
- MSA *msa; /* multiple sequence alignment */
- int fmt; /* format of afile */
- int rlen; /* raw sequence length */
- int nres; /* number of residues */
- float **imx; /* identity matrix */
- int i,j;
- int small, large;
- int bestj, worstj;
- float sum, best, worst;
- float worst_worst, worst_best, best_best;
- float avgid;
- int nsample;
-
- int allreport;
- int do_fast;
- int be_quiet;
- char *consfile;
- FILE *consfp = NULL;
- char *identmx_report; /* file to save identity matrix info to */
- FILE *identmx_fp = NULL;
-
- char *optname;
- char *optarg;
- int optind;
-
- /* These inits are solely to silence gcc warnings about
- * uninitialized variables
- */
- worst_worst = worst_best = best_best = 0.0;
- bestj = worstj = -1;
-
- /***********************************************
- * Parse command line
- ***********************************************/
-
- fmt = MSAFILE_UNKNOWN; /* by default, we autodetect file format */
- allreport = FALSE;
- do_fast = FALSE;
- be_quiet = FALSE;
- consfile = NULL;
- identmx_report = NULL;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "-a") == 0) { allreport = TRUE; }
- else if (strcmp(optname, "-f") == 0) { do_fast = TRUE; }
- else if (strcmp(optname, "-q") == 0) { be_quiet = TRUE; }
- else if (strcmp(optname, "--consensus") == 0) { consfile = optarg; }
- else if (strcmp(optname, "--identmx") == 0) { identmx_report = optarg; }
- else if (strcmp(optname, "--informat") == 0) {
- fmt = String2SeqfileFormat(optarg);
- if (fmt == MSAFILE_UNKNOWN)
- Die("unrecognized sequence file format \"%s\"", optarg);
- if (! IsAlignmentFormat(fmt))
- Die("%s is an unaligned format, can't read as an alignment", optarg);
- }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc - optind != 1) Die("Incorrect number of arguments.\n%s\n", usage);
- afile = argv[optind];
-
- if (do_fast && allreport)
- Die("Verbose reports (-a, --identmx) are incompatible with fast sampling (-f)");
- if (do_fast && identmx_report != NULL)
- Die("Verbose reports (-a, --identmx) are incompatible with fast sampling (-f)");
-
- if (! be_quiet)
- SqdBanner(stdout, banner);
-
- /***********************************************
- * Loop over every alignment in the file.
- ***********************************************/
-
- if ((afp = MSAFileOpen(afile, fmt, NULL)) == NULL)
- Die("Alignment file %s could not be opened for reading", afile);
-
- if (consfile != NULL && (consfp = fopen(consfile, "w")) == NULL)
- Die("Failed to open consensus sequence file %s for writing", consfile);
-
- if (identmx_report != NULL && (identmx_fp = fopen(identmx_report, "w")) == NULL)
- Die("Failed to open identity matrix report file %s for writing", identmx_report);
-
- while ((msa = MSAFileRead(afp)) != NULL)
- {
- for (i = 0; i < msa->nseq; i++) s2upper(msa->aseq[i]);
-
- /* Statistics we always collect:
- * unaligned sequence lengths; mean and range
- */
- nres = 0;
- small = large = -1;
- for (i = 0; i < msa->nseq; i++)
- {
- rlen = DealignedLength(msa->aseq[i]);
- nres += rlen;
- if (small == -1 || rlen < small) small = rlen;
- if (large == -1 || rlen > large) large = rlen;
- }
-
- /* Statistics we have to be careful about
- * collecting, because of time constraints on NxN operations
- */
- if (do_fast)
- {
- nsample = 1000;
- avgid = AlignmentIdentityBySampling(msa->aseq, msa->alen, msa->nseq,
- nsample);
- }
- else
- {
- /* In a full report, for each sequence, find the best relative,
- * and the worst relative. For overall statistics, save the
- * worst best (most distant single seq) and the best best
- * (most closely related pair) and the worst worst (most
- * distantly related pair) and yes, I know it's confusing.
- */
-
- MakeIdentityMx(msa->aseq, msa->nseq, &imx);
- if (allreport) {
- printf(" %-15s %5s %7s %-15s %7s %-15s\n",
- "NAME", "LEN", "HIGH ID", "(TO)", "LOW ID", "(TO)");
- printf(" --------------- ----- ------- --------------- ------- ---------------\n");
- }
-
- /* Print the identity matrix report: one line per pair of sequences.
- */
- if (identmx_report != NULL)
- {
- for (i = 0; i < msa->nseq; i++)
- for (j = i+1; j < msa->nseq; j++)
- fprintf(identmx_fp, "%-4d %-4d %-15s %-15s %.3f\n",
- i, j, msa->sqname[i], msa->sqname[j], imx[i][j]);
- }
-
- sum = 0.0;
- worst_best = 1.0;
- best_best = 0.0;
- worst_worst = 1.0;
- for (i = 0; i < msa->nseq; i++)
- {
- worst = 1.0;
- best = 0.0;
- for (j = 0; j < msa->nseq; j++)
- { /* closest seq to this one = best */
- if (i != j && imx[i][j] > best)
- { best = imx[i][j]; bestj = j; }
- if (imx[i][j] < worst)
- { worst = imx[i][j]; worstj = j; }
- }
-
- if (allreport)
- printf("* %-15s %5d %7.1f %-15s %7.1f %-15s\n",
- msa->sqname[i], DealignedLength(msa->aseq[i]),
- best * 100., msa->sqname[bestj],
- worst * 100., msa->sqname[worstj]);
-
- if (best > best_best) best_best = best;
- if (best < worst_best) worst_best = best;
- if (worst < worst_worst) worst_worst = worst;
- for (j = 0; j < i; j++)
- sum += imx[i][j];
-
- }
- avgid = sum / (float) (msa->nseq * (msa->nseq-1)/2.0);
- if (allreport) puts("");
- FMX2Free(imx);
- }
-
- /* Print output.
- * Some fields aren't available if -f (fast) was chosen.
- */
- if (msa->name != NULL)
- printf("Alignment name: %s\n", msa->name);
- printf("Format: %s\n", SeqfileFormat2String(afp->format));
- printf("Number of sequences: %d\n", msa->nseq);
- printf("Total # residues: %d\n", nres);
- printf("Smallest: %d\n", small);
- printf("Largest: %d\n", large);
- printf("Average length: %.1f\n", (float) nres / (float) msa->nseq);
- printf("Alignment length: %d\n", msa->alen);
- printf("Average identity: %.0f%%\n", 100.*avgid);
- if (! do_fast) {
- printf("Most related pair: %.0f%%\n", 100.*best_best);
- printf("Most unrelated pair: %.0f%%\n", 100.*worst_worst);
- printf("Most distant seq: %.0f%%\n", 100.*worst_best);
- }
-
- /* Save majority rule consensus sequence if we were asked
- */
- if (consfile != NULL) {
- char *cs;
- cs = MajorityRuleConsensus(msa->aseq, msa->nseq, msa->alen);
- WriteSimpleFASTA(consfp, cs,
- msa->name != NULL? msa->name : "consensus",
- msa->desc);
- free(cs);
- printf("Consensus: written to %s\n", consfile);
- }
-
- puts("//");
- MSAFree(msa);
- }
-
- MSAFileClose(afp);
- if (consfile != NULL) fclose(consfp);
- return 0;
-}
diff --git a/squid/clustal.c b/squid/clustal.c
deleted file mode 100644
index cd4c689..0000000
--- a/squid/clustal.c
+++ /dev/null
@@ -1,181 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* clustal.c
- * SRE, Sun Jun 6 17:50:45 1999 [bus from Madison, 1999 worm mtg]
- *
- * Import/export of ClustalV/W multiple sequence alignment
- * formatted files. Derivative of msf.c; MSF is a pretty
- * generic interleaved format.
- *
- * CVS $Id: clustal.c,v 1.2 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include "squid.h"
-#include "msa.h"
-
-#ifdef TESTDRIVE_CLUSTAL
-/*****************************************************************
- * msf.c test driver:
- * cc -DTESTDRIVE_CLUSTAL -g -O2 -Wall -o test clustal.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c -lm
- *
- */
-int
-main(int argc, char **argv)
-{
- MSAFILE *afp;
- MSA *msa;
- char *file;
-
- file = argv[1];
-
- if ((afp = MSAFileOpen(file, MSAFILE_CLUSTAL, NULL)) == NULL)
- Die("Couldn't open %s\n", file);
-
- while ((msa = ReadClustal(afp)) != NULL)
- {
- WriteClustal(stdout, msa);
- MSAFree(msa);
- }
-
- MSAFileClose(afp);
- exit(0);
-}
-/******************************************************************/
-#endif /* testdrive_clustal */
-
-
-/* Function: ReadClustal()
- * Date: SRE, Sun Jun 6 17:53:49 1999 [bus from Madison, 1999 worm mtg]
- *
- * Purpose: Parse an alignment read from an open Clustal format
- * alignment file. Clustal is a single-alignment format.
- * Return the alignment, or NULL if we have no data.
- *
- * Args: afp - open alignment file
- *
- * Returns: MSA * - an alignment object
- * caller responsible for an MSAFree()
- * NULL if no more alignments
- *
- * Diagnostics:
- * Will Die() here with a (potentially) useful message
- * if a parsing error occurs.
- */
-MSA *
-ReadClustal(MSAFILE *afp)
-{
- MSA *msa;
- char *s;
- int slen;
- int sqidx;
- char *name;
- char *seq;
- char *s2;
-
- if (feof(afp->f)) return NULL;
-
- /* Skip until we see the CLUSTAL header
- */
- while ((s = MSAFileGetLine(afp)) != NULL)
- {
- if (strncmp(s, "CLUSTAL", 7) == 0 &&
- strstr(s, "multiple sequence alignment") != NULL)
- break;
- }
- if (s == NULL) return NULL;
-
- msa = MSAAlloc(10, 0);
-
- /* Now we're in the sequence section.
- * As discussed above, if we haven't seen a sequence name, then we
- * don't include the sequence in the alignment.
- * Watch out for conservation markup lines that contain *.: chars
- */
- while ((s = MSAFileGetLine(afp)) != NULL)
- {
- if ((name = sre_strtok(&s, WHITESPACE, NULL)) == NULL) continue;
- if ((seq = sre_strtok(&s, WHITESPACE, &slen)) == NULL) continue;
- s2 = sre_strtok(&s, "\n", NULL);
-
- /* The test for a conservation markup line
- */
- if (strpbrk(name, ".*:") != NULL && strpbrk(seq, ".*:") != NULL)
- continue;
- if (s2 != NULL)
- Die("Parse failed at line %d, file %s: possibly using spaces as gaps",
- afp->linenumber, afp->fname);
-
- /* It's not blank, and it's not a coord line: must be sequence
- */
- sqidx = MSAGetSeqidx(msa, name, msa->lastidx+1);
- msa->lastidx = sqidx;
- msa->sqlen[sqidx] = sre_strcat(&(msa->aseq[sqidx]), msa->sqlen[sqidx], seq, slen);
- }
-
- MSAVerifyParse(msa); /* verifies, and also sets alen and wgt. */
- return msa;
-}
-
-
-/* Function: WriteClustal()
- * Date: SRE, Sun Jun 6 18:12:47 1999 [bus from Madison, worm mtg 1999]
- *
- * Purpose: Write an alignment in Clustal format to an open file.
- *
- * Args: fp - file that's open for writing.
- * msa - alignment to write.
- *
- * Returns: (void)
- */
-void
-WriteClustal(FILE *fp, MSA *msa)
-{
- int idx; /* counter for sequences */
- int len; /* tmp variable for name lengths */
- int namelen; /* maximum name length used */
- int pos; /* position counter */
- char buf[64]; /* buffer for writing seq */
- int cpl = 50; /* char per line (< 64) */
-
- /* calculate max namelen used */
- namelen = 0;
- for (idx = 0; idx < msa->nseq; idx++)
- if ((len = strlen(msa->sqname[idx])) > namelen)
- namelen = len;
-
- fprintf(fp, "CLUSTAL W(1.5) multiple sequence alignment\n");
-
- /*****************************************************
- * Write the sequences
- *****************************************************/
-
- for (pos = 0; pos < msa->alen; pos += cpl)
- {
- fprintf(fp, "\n"); /* Blank line between sequence blocks */
- for (idx = 0; idx < msa->nseq; idx++)
- {
- strncpy(buf, msa->aseq[idx] + pos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "%*s %s\n", namelen, msa->sqname[idx], buf);
- }
- }
-
- return;
-}
-
-
-
diff --git a/squid/cluster.c b/squid/cluster.c
deleted file mode 100644
index 125e784..0000000
--- a/squid/cluster.c
+++ /dev/null
@@ -1,542 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* cluster.c
- * SRE, Sun Jul 18 09:49:47 1993
- * moved to squid Thu Mar 3 08:42:57 1994
- * CVS $Id: cluster.c,v 1.4 2003/04/14 16:00:16 eddy Exp $
- *
- * almost identical to bord.c, from fd
- * also now contains routines for constructing difference matrices
- * from alignments
- *
- * "branch ordering": Input a symmetric or upper-right-diagonal
- * NxN difference matrix (usually constructed by pairwise alignment
- * and similarity calculations for N sequences). Use the simple
- * cluster analysis part of the Fitch/Margoliash tree-building algorithm
- * (as described by Fitch and Margoliash 1967 as well as Feng
- * and Doolittle 1987) to calculate the topology of an "evolutionary
- * tree" consistent with the difference matrix. Returns an array
- * which represents the tree.
- *
- * The input difference matrix is just an NxN matrix of floats.
- * A good match is a small difference score (the algorithm is going
- * to search for minima among the difference scores). The original difference
- * matrix remains unchanged by the calculations.
- *
- * The output requires some explanation. A phylogenetic
- * tree is a binary tree, with N "leaves" and N-1 "nodes". The
- * topology of the tree may be completely described by N-1 structures
- * containing two pointers; each pointer points to either a leaf
- * or another node. Here, this is implemented with integer indices
- * rather than pointers. An array of N-1 pairs of ints is returned.
- * If the index is in the range (0..N-1), it is a "leaf" -- the
- * number of one of the sequences. If the index is in the range
- * (N..2N-2), it is another "node" -- (index-N) is the index
- * of the node in the returned array.
- *
- * If both indices of a member of the returned array point to
- * nodes, the tree is "compound": composed of more than one
- * cluster of related sequences.
- *
- * The higher-numbered elements of the returned array were the
- * first constructed, and hence represent the distal tips
- * of the tree -- the most similar sequences. The root
- * is node 0.
- ******************************************************************
- *
- * Algorithm
- *
- * INITIALIZATIONS:
- * - copy the difference matrix (otherwise the caller's copy would
- * get destroyed by the operations of this algorithm). If
- * it's asymmetric, make it symmetric.
- * - make a (0..N-1) array of ints to keep track of the indices in
- * the difference matrix as they get swapped around. Initialize
- * this matrix to 0..N-1.
- * - make a (0..N-2) array of int[2] to store the results (the tree
- * topology). Doesn't need to be initialized.
- * - keep track of a "N'", the current size of the difference
- * matrix being operated on.
- *
- * PROCESSING THE DIFFERENCE MATRIX:
- * - for N' = N down to N' = 2 (N-1 steps):
- * - in the half-diagonal N'xN' matrix, find the indices i,j at which
- * there's the minimum difference score
- *
- * Store the results:
- * - at position N'-2 of the result array, store coords[i] and
- * coords[j].
- *
- * Move i,j rows, cols to the outside edges of the matrix:
- * - swap row i and row N'-2
- * - swap row j and row N'-1
- * - swap column i and column N'-2
- * - swap column j and column N'-1
- * - swap indices i, N'-2 in the index array
- * - swap indices j, N'-1 in the index array
- *
- * Build a average difference score for differences to i,j:
- * - for all columns, find avg difference between rows i and j and store in row i:
- * row[i][col] = (row[i][col] + row[j][col]) / 2.0
- * - copy the contents of row i to column i (it's a symmetric
- * matrix, no need to recalculate)
- * - store an index N'+N-2 at position N'-2 of the index array: means
- * that this row/column is now a node rather than a leaf, and
- * contains minimum values
- *
- * Continue:
- * - go to the next N'
- *
- * GARBAGE COLLECTION & RETURN.
- *
- **********************************************************************
- *
- * References:
- *
- * Feng D-F and R.F. Doolittle. "Progressive sequence alignment as a
- * prerequisite to correct phylogenetic trees." J. Mol. Evol.
- * 25:351-360, 1987.
- *
- * Fitch W.M. and Margoliash E. "Construction of phylogenetic trees."
- * Science 155:279-284, 1967.
- *
- **********************************************************************
- *
- * SRE, 18 March 1992 (bord.c)
- * SRE, Sun Jul 18 09:52:14 1993 (cluster.c)
- * added to squid Thu Mar 3 09:13:56 1994
- **********************************************************************
- * Mon May 4 09:47:02 1992: keep track of difference scores at each node
- */
-
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-
-#include "squid.h"
-#include "sqfuncs.h"
-
-/* Function: Cluster()
- *
- * Purpose: Cluster analysis on a distance matrix. Constructs a
- * phylogenetic tree which contains the topology
- * and info for each node: branch lengths, how many
- * sequences are included under the node, and which
- * sequences are included under the node.
- *
- * Args: dmx - the NxN distance matrix ( >= 0.0, larger means more diverged)
- * N - size of mx (number of sequences)
- * mode - CLUSTER_MEAN, CLUSTER_MAX, or CLUSTER_MIN
- * ret_tree- RETURN: the tree
- *
- * Return: 1 on success, 0 on failure.
- * The caller is responsible for freeing the tree's memory,
- * by calling FreePhylo(tree, N).
- */
-int
-Cluster(float **dmx, int N, enum clust_strategy mode, struct phylo_s **ret_tree)
-{
- struct phylo_s *tree; /* (0..N-2) phylogenetic tree */
- float **mx; /* copy of difference matrix */
- int *coord; /* (0..N-1), indices for matrix coords */
- int i, j; /* coords of minimum difference */
- int idx; /* counter over seqs */
- int Np; /* N', a working copy of N */
- int row, col; /* loop variables */
- float min; /* best minimum score found */
- float *trow; /* tmp pointer for swapping rows */
- float tcol; /* tmp storage for swapping cols */
- float *diff; /* (0..N-2) difference scores at nodes */
- int swapfoo; /* for SWAP() macro */
-
- /**************************
- * Initializations.
- **************************/
- /* We destroy the matrix we work on, so make a copy of dmx.
- */
- mx = MallocOrDie (sizeof(float *) * N);
- for (i = 0; i < N; i++)
- {
- mx[i] = MallocOrDie (sizeof(float) * N);
- for (j = 0; j < N; j++)
- mx[i][j] = dmx[i][j];
- }
- /* coord array alloc, (0..N-1) */
- coord = MallocOrDie (N * sizeof(int));
- diff = MallocOrDie ((N-1) * sizeof(float));
- /* init the coord array to 0..N-1 */
- for (col = 0; col < N; col++) coord[col] = col;
- for (i = 0; i < N-1; i++) diff[i] = 0.0;
-
- /* tree array alloc, (0..N-2) */
- if ((tree = AllocPhylo(N)) == NULL) Die("AllocPhylo() failed");
-
- /*********************************
- * Process the difference matrix
- *********************************/
-
- /* N-prime, for an NxN down to a 2x2 diffmx */
- j= 0; /* just to silence gcc uninit warnings */
- for (Np = N; Np >= 2; Np--)
- {
- /* find a minimum on the N'xN' matrix*/
- min = 999999.;
- for (row = 0; row < Np; row++)
- for (col = row+1; col < Np; col++)
- if (mx[row][col] < min)
- {
- min = mx[row][col];
- i = row;
- j = col;
- }
-
- /* We're clustering row i with col j. write necessary
- * data into a node on the tree
- */
- /* topology info */
- tree[Np-2].left = coord[i];
- tree[Np-2].right = coord[j];
- if (coord[i] >= N) tree[coord[i]-N].parent = N + Np - 2;
- if (coord[j] >= N) tree[coord[j]-N].parent = N + Np - 2;
-
- /* keep score info */
- diff[Np-2] = tree[Np-2].diff = min;
-
- /* way-simple branch length estimation */
- tree[Np-2].lblen = tree[Np-2].rblen = min;
- if (coord[i] >= N) tree[Np-2].lblen -= diff[coord[i]-N];
- if (coord[j] >= N) tree[Np-2].rblen -= diff[coord[j]-N];
-
- /* number seqs included at node */
- if (coord[i] < N)
- {
- tree[Np-2].incnum ++;
- tree[Np-2].is_in[coord[i]] = 1;
- }
- else
- {
- tree[Np-2].incnum += tree[coord[i]-N].incnum;
- for (idx = 0; idx < N; idx++)
- tree[Np-2].is_in[idx] |= tree[coord[i]-N].is_in[idx];
- }
-
- if (coord[j] < N)
- {
- tree[Np-2].incnum ++;
- tree[Np-2].is_in[coord[j]] = 1;
- }
- else
- {
- tree[Np-2].incnum += tree[coord[j]-N].incnum;
- for (idx = 0; idx < N; idx++)
- tree[Np-2].is_in[idx] |= tree[coord[j]-N].is_in[idx];
- }
-
-
- /* Now build a new matrix, by merging row i with row j and
- * column i with column j; see Fitch and Margoliash
- */
- /* Row and column swapping. */
- /* watch out for swapping i, j away: */
- if (i == Np-1 || j == Np-2)
- SWAP(i,j);
-
- if (i != Np-2)
- {
- /* swap row i, row N'-2 */
- trow = mx[Np-2]; mx[Np-2] = mx[i]; mx[i] = trow;
- /* swap col i, col N'-2 */
- for (row = 0; row < Np; row++)
- {
- tcol = mx[row][Np-2];
- mx[row][Np-2] = mx[row][i];
- mx[row][i] = tcol;
- }
- /* swap coord i, coord N'-2 */
- SWAP(coord[i], coord[Np-2]);
- }
-
- if (j != Np-1)
- {
- /* swap row j, row N'-1 */
- trow = mx[Np-1]; mx[Np-1] = mx[j]; mx[j] = trow;
- /* swap col j, col N'-1 */
- for (row = 0; row < Np; row++)
- {
- tcol = mx[row][Np-1];
- mx[row][Np-1] = mx[row][j];
- mx[row][j] = tcol;
- }
- /* swap coord j, coord N'-1 */
- SWAP(coord[j], coord[Np-1]);
- }
-
- /* average i and j together; they're now
- at Np-2 and Np-1 though */
- i = Np-2;
- j = Np-1;
- /* merge by saving avg of cols of row i and row j */
- for (col = 0; col < Np; col++)
- {
- switch (mode) {
- case CLUSTER_MEAN: mx[i][col] =(mx[i][col]+ mx[j][col]) / 2.0; break;
- case CLUSTER_MIN: mx[i][col] = MIN(mx[i][col], mx[j][col]); break;
- case CLUSTER_MAX: mx[i][col] = MAX(mx[i][col], mx[j][col]); break;
- default: mx[i][col] =(mx[i][col]+ mx[j][col]) / 2.0; break;
- }
- }
- /* copy those rows to columns */
- for (col = 0; col < Np; col++)
- mx[col][i] = mx[i][col];
- /* store the node index in coords */
- coord[Np-2] = Np+N-2;
- }
-
- /**************************
- * Garbage collection and return
- **************************/
- Free2DArray((void **) mx, N);
- free(coord);
- free(diff);
- *ret_tree = tree;
- return 1;
-}
-
-/* Function: AllocPhylo()
- *
- * Purpose: Allocate space for a phylo_s array. N-1 structures
- * are allocated, one for each node; in each node, a 0..N
- * is_in flag array is also allocated and initialized to
- * all zeros.
- *
- * Args: N - size; number of sequences being clustered
- *
- * Return: pointer to the allocated array
- *
- */
-struct phylo_s *
-AllocPhylo(int N)
-{
- struct phylo_s *tree;
- int i;
-
- if ((tree = (struct phylo_s *) malloc ((N-1) * sizeof(struct phylo_s))) == NULL)
- return NULL;
-
- for (i = 0; i < N-1; i++)
- {
- tree[i].diff = 0.0;
- tree[i].lblen = tree[i].rblen = 0.0;
- tree[i].left = tree[i].right = tree[i].parent = -1;
- tree[i].incnum = 0;
- if ((tree[i].is_in = (char *) calloc (N, sizeof(char))) == NULL)
- return NULL;
- }
- return tree;
-}
-
-
-/* Function: FreePhylo()
- *
- * Purpose: Free a clustree array that was built to cluster N sequences.
- *
- * Args: tree - phylogenetic tree to free
- * N - size of clustree; number of sequences it clustered
- *
- * Return: (void)
- */
-void
-FreePhylo(struct phylo_s *tree, int N)
-{
- int idx;
-
- for (idx = 0; idx < N-1; idx++)
- free(tree[idx].is_in);
- free(tree);
-}
-
-
-/* Function: MakeDiffMx()
- *
- * Purpose: Given a set of aligned sequences, construct
- * an NxN fractional difference matrix. (i.e. 1.0 is
- * completely different, 0.0 is exactly identical).
- *
- * Args: aseqs - flushed, aligned sequences
- * num - number of aseqs
- * ret_dmx - RETURN: difference matrix
- *
- * Return: 1 on success, 0 on failure.
- * Caller must free diff matrix with FMX2Free(dmx)
- */
-void
-MakeDiffMx(char **aseqs, int num, float ***ret_dmx)
-{
- float **dmx; /* RETURN: distance matrix */
- int i,j; /* counters over sequences */
-
- /* Allocate 2D float matrix
- */
- dmx = FMX2Alloc(num, num);
-
- /* Calculate distances; symmetric matrix
- * record difference, not identity (1 - identity)
- */
- for (i = 0; i < num; i++)
- for (j = i; j < num; j++)
- dmx[i][j] = dmx[j][i] = 1.0 - PairwiseIdentity(aseqs[i], aseqs[j]);
-
- *ret_dmx = dmx;
- return;
-}
-
-/* Function: MakeIdentityMx()
- *
- * Purpose: Given a set of aligned sequences, construct
- * an NxN fractional identity matrix. (i.e. 1.0 is
- * completely identical, 0.0 is completely different).
- * Virtually identical to MakeDiffMx(). It's
- * less confusing to have two distinct functions, I find.
- *
- * Args: aseqs - flushed, aligned sequences
- * num - number of aseqs
- * ret_imx - RETURN: identity matrix (caller must free)
- *
- * Return: 1 on success, 0 on failure.
- * Caller must free imx using FMX2Free(imx)
- */
-void
-MakeIdentityMx(char **aseqs, int num, float ***ret_imx)
-{
- float **imx; /* RETURN: identity matrix */
- int i,j; /* counters over sequences */
-
- /* Allocate 2D float matrix
- */
- imx = FMX2Alloc(num, num);
-
- /* Calculate distances, symmetric matrix
- */
- for (i = 0; i < num; i++)
- for (j = i; j < num; j++)
- imx[i][j] = imx[j][i] = PairwiseIdentity(aseqs[i], aseqs[j]);
-
- *ret_imx = imx;
- return;
-}
-
-
-
-/* Function: PrintNewHampshireTree()
- *
- * Purpose: Print out a tree in the "New Hampshire" standard
- * format. See PHYLIP's draw.doc for a definition of
- * the New Hampshire format.
- *
- * Like a CFG, we generate the format string left to
- * right by a preorder tree traversal.
- *
- * Args: fp - file to print to
- * ainfo- alignment info, including sequence names
- * tree - tree to print
- * N - number of leaves
- *
- */
-void
-PrintNewHampshireTree(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N)
-{
- struct intstack_s *stack;
- int code;
- float *blen;
- int docomma;
-
- blen = (float *) MallocOrDie (sizeof(float) * (2*N-1));
- stack = InitIntStack();
- PushIntStack(stack, N); /* push root on stack */
- docomma = FALSE;
-
- /* node index code:
- * 0..N-1 = leaves; indexes of sequences.
- * N..2N-2 = interior nodes; node-N = index of node in tree structure.
- * code N is the root.
- * 2N..3N-2 = special flags for closing interior nodes; node-2N = index in tree
- */
- while (PopIntStack(stack, &code))
- {
- if (code < N) /* we're a leaf. */
- {
- /* 1) print name:branchlength */
- if (docomma) fputs(",", fp);
- fprintf(fp, "%s:%.5f", ainfo->sqinfo[code].name, blen[code]);
- docomma = TRUE;
- }
-
- else if (code < 2*N) /* we're an interior node */
- {
- /* 1) print a '(' */
- if (docomma) fputs(",\n", fp);
- fputs("(", fp);
- /* 2) push on stack: ), rchild, lchild */
- PushIntStack(stack, code+N);
- PushIntStack(stack, tree[code-N].right);
- PushIntStack(stack, tree[code-N].left);
- /* 3) record branch lengths */
- blen[tree[code-N].right] = tree[code-N].rblen;
- blen[tree[code-N].left] = tree[code-N].lblen;
- docomma = FALSE;
- }
-
- else /* we're closing an interior node */
- {
- /* print a ):branchlength */
- if (code == 2*N) fprintf(fp, ");\n");
- else fprintf(fp, "):%.5f", blen[code-N]);
- docomma = TRUE;
- }
- }
-
- FreeIntStack(stack);
- free(blen);
- return;
-}
-
-
-/* Function: PrintPhylo()
- *
- * Purpose: Debugging output of a phylogenetic tree structure.
- */
-void
-PrintPhylo(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N)
-{
- int idx;
-
- for (idx = 0; idx < N-1; idx++)
- {
- fprintf(fp, "Interior node %d (code %d)\n", idx, idx+N);
- fprintf(fp, "\tParent: %d (code %d)\n", tree[idx].parent-N, tree[idx].parent);
- fprintf(fp, "\tLeft: %d (%s) %f\n",
- tree[idx].left < N ? tree[idx].left-N : tree[idx].left,
- tree[idx].left < N ? ainfo->sqinfo[tree[idx].left].name : "interior",
- tree[idx].lblen);
- fprintf(fp, "\tRight: %d (%s) %f\n",
- tree[idx].right < N ? tree[idx].right-N : tree[idx].right,
- tree[idx].right < N ? ainfo->sqinfo[tree[idx].right].name : "interior",
- tree[idx].rblen);
- fprintf(fp, "\tHeight: %f\n", tree[idx].diff);
- fprintf(fp, "\tIncludes:%d seqs\n", tree[idx].incnum);
- }
-}
-
-
-
diff --git a/squid/compalign_main.c b/squid/compalign_main.c
deleted file mode 100644
index 88647ba..0000000
--- a/squid/compalign_main.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* main for compalign
- *
- * Compalign -- a program to compare two sequence alignments
- * SRE, Tue Nov 3 07:38:03 1992
- * RCS $Id: compalign_main.c,v 1.6 2003/04/14 16:00:16 eddy Exp $
- *
- * incorporated into SQUID, Thu Jan 26 16:52:41 1995
- *
- * Usage: compalign <trusted-alignment> <test-alignment>
- *
- * Calculate the fractional "identity" between the trusted alignment
- * and the test alignment. The two files must contain exactly the same
- * sequences, in exactly the same order.
- *
- * The identity of the multiple sequence alignments is defined as
- * the averaged identity over all N(N-1)/2 pairwise alignments.
- *
- * The fractional identity of two sets of pairwise alignments
- * is in turn defined as follows (for aligned known sequences k1 and k2,
- * and aligned test sequences t1 and t2):
- *
- * matched columns / total columns,
- *
- * where total columns = the total number of columns in
- * which there is a valid (nongap) symbol in k1 or k2;
- *
- * matched columns = the number of columns in which one of the
- * following is true:
- *
- * k1 and k2 both have valid symbols at a given column; t1 and t2
- * have the same symbols aligned in a column of the t1/t2
- * alignment;
- *
- * k1 has a symbol aligned to a gap in k2; that symbol in t1
- * is also aligned to a gap;
- *
- * k2 has a symbol aligned to a gap in k1; that symbol in t2
- * is also aligned to a gap.
- *
- * Because scores for all possible pairs are calculated, the
- * algorithm is of order (N^2)L for N sequences of length L;
- * large sequence sets will take a while.
- *
- * Sean Eddy, Tue Nov 3 07:46:59 1992
- *
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include "squid.h"
-#include "msa.h"
-
-static char banner[] = "compalign - compare two multiple alignments";
-
-static char usage[] = "\
-Usage: compalign [-options] <trusted.ali> <test.ali>\n\
- Available options:\n\
- -c : only compare under marked #=CS consensus structure\n\
- -h : print short help and usage info\n\
-";
-
-static char experts[] = "\
- --informat <s> : specify that both alignments are in format <s> (MSF, for instance)\n\
- --quiet : suppress verbose header (used in regression testing)\n\
-";
-
-struct opt_s OPTIONS[] = {
- { "-c", TRUE, sqdARG_NONE },
- { "-h", TRUE, sqdARG_NONE },
- { "--informat", FALSE, sqdARG_STRING },
- { "--quiet", FALSE, sqdARG_NONE },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-
-int
-main(int argc, char **argv)
-{
- char *kfile; /* name of file of trusted (known) alignment */
- char *tfile; /* name of file of test alignment */
- MSAFILE *kfp; /* open ptr into trusted (known) alignfile */
- MSAFILE *tfp; /* open ptr into test alignment file */
- int format; /* expected format of alignment files */
- MSA *kmsa; /* a trusted (known) alignment */
- MSA *tmsa; /* a test alignment */
- char **kraw; /* dealigned trusted seqs */
- char **traw; /* dealigned test sequences */
- int idx; /* counter for sequences */
- int apos; /* position in alignment */
- float score; /* RESULT: score for the comparison */
-
- int cs_only; /* TRUE to compare under #=CS annotation only */
- int *ref = NULL; /* init only to silence gcc warning */
- int be_quiet; /* TRUE to suppress verbose header */
-
- char *optname;
- char *optarg;
- int optind;
-
- /***********************************************
- * Parse command line
- ***********************************************/
-
- format = MSAFILE_UNKNOWN;
- cs_only = FALSE;
- be_quiet = FALSE;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "-c") == 0) cs_only = TRUE;
- else if (strcmp(optname, "--quiet") == 0) be_quiet = TRUE;
- else if (strcmp(optname, "--informat") == 0) {
- format = String2SeqfileFormat(optarg);
- if (format == MSAFILE_UNKNOWN)
- Die("unrecognized sequence file format \"%s\"", optarg);
- if (! IsAlignmentFormat(format))
- Die("%s is an unaligned format, can't read as an alignment", optarg);
- }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc - optind != 2)
- Die("Incorrect number of command line arguments.\n%s\n", usage);
-
- kfile = argv[optind++];
- tfile = argv[optind];
-
- if (! be_quiet) SqdBanner(stdout, banner);
-
- /***********************************************
- * Read in the alignments
- * Capable of handling full Stockholm: >1 alignment/file
- ***********************************************/
-
- if ((kfp = MSAFileOpen(kfile, format, NULL)) == NULL)
- Die("Trusted alignment file %s could not be opened for reading", kfile);
- if ((tfp = MSAFileOpen(tfile, format, NULL)) == NULL)
- Die("Test alignment file %s could not be opened for reading", tfile);
-
- while ((kmsa = MSAFileRead(kfp)) != NULL)
- {
- if ((tmsa = MSAFileRead(tfp)) == NULL)
- Die("Failed to get a test alignment to match with the trusted alignment");
-
- /* test that they're the same! */
- if (kmsa->nseq != tmsa->nseq)
- Die("files %s and %s do not contain same number of seqs!\n", kfile, tfile);
-
- for (idx = 0; idx < kmsa->nseq; idx++)
- {
- s2upper(kmsa->aseq[idx]);
- s2upper(tmsa->aseq[idx]);
- }
- /* another sanity check */
- for (idx = 0; idx < kmsa->nseq; idx++)
- if (strcmp(kmsa->sqname[idx], tmsa->sqname[idx]) != 0)
- Die("seqs in %s and %s don't seem to be in the same order\n (%s != %s)",
- kfile, tfile, kmsa->sqname[idx], tmsa->sqname[idx]);
-
- /* and *another* sanity check */
- DealignAseqs(kmsa->aseq, kmsa->nseq, &kraw);
- DealignAseqs(tmsa->aseq, tmsa->nseq, &traw);
- for (idx = 0; idx < kmsa->nseq; idx++)
- if (strcmp(kraw[idx], traw[idx]) != 0)
- Die("raw seqs in %s and %s are not the same (died at %s, number %d)\n",
- kfile, tfile, kmsa->sqname[idx], idx);
- Free2DArray((void **) kraw, kmsa->nseq);
- Free2DArray((void **) traw, tmsa->nseq);
-
- if (cs_only)
- {
- if (kmsa->ss_cons == NULL)
- Die("Trusted alignment %s has no consensus structure annotation\n -- can't use -c!\n",
- kfile);
- ref = (int *) MallocOrDie (sizeof(int) * kmsa->alen);
- for (apos = 0; apos < kmsa->alen; apos++)
- ref[apos] = (isgap(kmsa->ss_cons[apos])) ? FALSE : TRUE;
- }
-
- /***********************************************
- * Compare the alignments, print results
- ***********************************************/
-
- if (cs_only)
- score = CompareRefMultAlignments(ref, kmsa->aseq, tmsa->aseq, kmsa->nseq);
- else
- score = CompareMultAlignments(kmsa->aseq, tmsa->aseq, kmsa->nseq);
-
- printf("Trusted alignment: %s\n", kmsa->name != NULL ? kmsa->name : kfile);
- printf("Test alignment: %s\n", tmsa->name != NULL ? tmsa->name : tfile);
- printf("Total sequences: %d\n", kmsa->nseq);
- printf("Alignment identity: %.4f\n", score);
- puts("//");
-
- if (cs_only) free(ref);
- MSAFree(kmsa);
- MSAFree(tmsa);
- }
-
- MSAFileClose(kfp);
- MSAFileClose(tfp);
- return 0;
-}
-
-
diff --git a/squid/compstruct_main.c b/squid/compstruct_main.c
deleted file mode 100644
index 5299cc6..0000000
--- a/squid/compstruct_main.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* compstruct_main.c
- * SRE, Tue Aug 30 10:35:31 1994
- *
- * Compare RNA secondary structures.
- * CVS $Id: compstruct_main.c,v 1.5 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include "squid.h"
-#include "msa.h"
-
-static char banner[] = "compalign - compare test RNA secondary structure predictions to trusted set";
-
-char usage[] = "\
-Usage: compstruct [-options] <trusted file> <test file>\n\
- Both files must contain secondary structure markup (e.g. Stockholm, SQUID,\n\
- SELEX formats), and sequences must occur in the same order in the two files.\n\
-\n\
- Available options are:\n\
- -h : print short help and usage info\n\
-";
-
-static char experts[] = "\
- --informat <s> : specify that both alignments are in format <s> (SELEX, for instance)\n\
- --quiet : suppress verbose header (used in regression testing)\n\
-";
-
-struct opt_s OPTIONS[] = {
- { "-h", TRUE, sqdARG_NONE },
- { "--informat", FALSE, sqdARG_STRING },
- { "--quiet", FALSE, sqdARG_NONE },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-
-static int KHS2ct(char *ss, int **ret_ct);
-/* static void WriteCT(FILE *fp, char *seq, int *ct, int len); */
-
-int
-main(int argc, char **argv)
-{
- char *kfile, *tfile; /* known, test structure file */
- int format; /* expected format of kfile, tfile */
- SQFILE *kfp, *tfp; /* open kfile, tfile */
- char *kseq, *tseq; /* known, test sequence */
- SQINFO kinfo, tinfo; /* known, test info */
- int *kct, *tct; /* known, test CT rep of structure */
- int pos;
- int nseq;
-
- int correct; /* count of correct base pair predictions */
- int missedpair; /* count of false negatives */
- int falsepair; /* count of false positives */
- int tot_trusted; /* total base pairs in trusted structure */
- int tot_predicted; /* total base pairs in predicted structure*/
- int tot_correct; /* cumulative total correct pairs */
-
- int dscorrect; /* count of correct 2-state paired prediction */
- int sscorrect; /* count of correct 2-state unpaired prediction */
- int tot_dscorrect;
- int tot_sscorrect;
- int tot_positions;
-
- int quiet; /* TRUE to silence verbose banner */
-
- char *optname;
- char *optarg;
- int optind;
-
- /***********************************************
- * Parse command line
- ***********************************************/
-
- format = MSAFILE_UNKNOWN;
- quiet = FALSE;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "--quiet") == 0) quiet = TRUE;
- else if (strcmp(optname, "--informat") == 0) {
- format = String2SeqfileFormat(optarg);
- if (format == MSAFILE_UNKNOWN)
- Die("unrecognized sequence file format \"%s\"", optarg);
- if (! IsAlignmentFormat(format))
- Die("%s is an unaligned format, can't read as an alignment", optarg);
- }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc - optind != 2)
- Die("Incorrect number of command line arguments.\n%s\n", usage);
-
- kfile = argv[optind++];
- tfile = argv[optind];
-
- if (! quiet) SqdBanner(stdout, banner);
-
- /***********************************************
- * Open the files
- ***********************************************/
-
- if ((kfp = SeqfileOpen(kfile, format, NULL)) == NULL)
- Die("Failed to open trusted structure file %s for reading", kfile);
- if ((tfp = SeqfileOpen(tfile, format, NULL)) == NULL)
- Die("Failed to open test structure file %s for reading", tfile);
-
- /***********************************************
- * Do structure comparisons, one seq at a time
- ***********************************************/
-
- tot_trusted = tot_predicted = tot_correct = 0;
- tot_dscorrect = tot_sscorrect = tot_positions = 0;
- nseq = 0;
- while (ReadSeq(kfp, kfp->format, &kseq, &kinfo) && ReadSeq(tfp, tfp->format, &tseq, &tinfo))
- {
- if (!quiet && strcmp(tinfo.name, kinfo.name) != 0)
- Warn("Trusted sequence %s, test sequence %s -- names not identical\n",
- kinfo.name, tinfo.name);
- if (!quiet && strcmp(kseq, tseq) != 0)
- Warn("Trusted sequence %s, test sequence %s -- sequences not identical\n",
- kinfo.name, tinfo.name);
-
- printf("%s %s\n", kinfo.name, (kinfo.flags & SQINFO_DESC) ? kinfo.desc : "");
-
- if (! (tinfo.flags & SQINFO_SS) && ! (kinfo.flags & SQINFO_SS))
- printf("[no test or trusted structure]\n\n");
- else if (! (tinfo.flags & SQINFO_SS))
- printf("[no test structure]\n\n");
- else if (! (kinfo.flags & SQINFO_SS))
- printf("[no trusted structure]\n\n");
- else
- {
- if (! KHS2ct(kinfo.ss, &kct))
- { printf("[bad trusted structure]\n"); goto CLEANUP;}
- if (! KHS2ct(tinfo.ss, &tct))
- { printf("[bad test structure]\n"); free(kct); goto CLEANUP; }
-
-/* WriteCT(stdout, tseq, tct, tinfo.len); */
-/* WriteCT(stdout, tseq, kct, tinfo.len); */
-
- correct = falsepair = missedpair = 0;
- dscorrect = sscorrect = 0;
- for (pos = 0; pos < kinfo.len; pos++)
- {
- /* check if actual base pair is predicted */
- if (kct[pos] >= 0 && kct[pos] == tct[pos])
- correct++;
- else if (kct[pos] >= 0)
- missedpair++;
-
- if (tct[pos] >= 0 && kct[pos] != tct[pos])
- falsepair++;
-
- /* 2 state prediction */
- if (kct[pos] >= 0 && tct[pos] >= 0)
- dscorrect++;
- else if (kct[pos] < 0 && tct[pos] < 0)
- sscorrect++;
- }
- nseq++;
- tot_trusted += correct + missedpair;
- tot_predicted += correct + falsepair;
- tot_correct += correct;
-
- tot_dscorrect += dscorrect;
- tot_sscorrect += sscorrect;
- tot_positions += kinfo.len;
-
- /* print out per sequence info */
- printf(" %d/%d trusted pairs predicted (%.2f%% sensitivity)\n",
- correct, correct+missedpair,
- 100. * (float) correct/ (float) (correct + missedpair));
- printf(" %d/%d predicted pairs correct (%.2f%% specificity)\n",
- correct, correct + falsepair,
- 100. * (float) correct/ (float) (correct + falsepair));
-
- printf(" Two state: %d/%d positions correctly predicted (%.2f%% accuracy)\n",
- dscorrect + sscorrect,
- kinfo.len,
- 100. * (float) (dscorrect + sscorrect) / (float) kinfo.len);
- puts("");
-
-
- free(kct);
- free(tct);
- }
-
- CLEANUP:
- FreeSequence(kseq, &kinfo);
- FreeSequence(tseq, &tinfo);
- }
-
- /* And the final summary:
- */
- puts("");
- printf("Overall structure prediction accuracy (%d sequences, %d positions)\n",
- nseq, tot_positions);
- printf(" %d/%d trusted pairs predicted (%.2f%% sensitivity)\n",
- tot_correct, tot_trusted,
- 100. * (float) tot_correct/ (float) tot_trusted);
- printf(" %d/%d predicted pairs correct (%.2f%% specificity)\n",
- tot_correct, tot_predicted,
- 100. * (float) tot_correct/ (float) tot_predicted);
- printf(" Two state: %d/%d positions correctly predicted (%.2f%% accuracy)\n",
- tot_dscorrect + tot_sscorrect, tot_positions,
- 100. * (float) (tot_dscorrect + tot_sscorrect) / (float) tot_positions);
- puts("");
-
- SeqfileClose(tfp);
- SeqfileClose(kfp);
- return 0;
-}
-
-
-/* Function: KHS2ct()
- *
- * Purpose: Convert a secondary structure string to an array of integers
- * representing what position each position is base-paired
- * to (0..len-1), or -1 if none. This is off-by-one from a
- * Zuker .ct file representation.
- *
- * The .ct representation can accomodate pseudoknots but the
- * secondary structure string cannot easily; the string contains
- * "Aa", "Bb", etc. pairs as a limited representation of
- * pseudoknots. The string contains "><" for base pairs.
- * Other symbols are ignored.
- *
- * Return: ret_ct is allocated here and must be free'd by caller.
- * Returns 1 on success, 0 if ss is somehow inconsistent.
- */
-static int
-KHS2ct(char *ss, int **ret_ct)
-{
- struct intstack_s *dolist[27];
- int *ct;
- int i;
- int pos, pair;
- int status = 1; /* success or failure return status */
- int len;
-
- for (i = 0; i < 27; i++)
- dolist[i] = InitIntStack();
- len = strlen(ss);
-
- if ((ct = (int *) malloc (len * sizeof(int))) == NULL)
- Die("malloc failed");
- for (pos = 0; pos < len; pos++)
- ct[pos] = -1;
-
- for (pos = 0; ss[pos] != '\0'; pos++)
- {
- if (ss[pos] == '>') /* left side of a pair: push onto stack 0 */
- PushIntStack(dolist[0], pos);
- else if (ss[pos] == '<') /* right side of a pair; resolve pair */
- {
- if (! PopIntStack(dolist[0], &pair))
- { status = 0; }
- else
- {
- ct[pos] = pair;
- ct[pair] = pos;
- }
- }
- /* same stuff for pseudoknots */
- else if (isupper((int) ss[pos]))
- PushIntStack(dolist[ss[pos] - 'A' + 1], pos);
- else if (islower((int) ss[pos]))
- {
- if (! PopIntStack(dolist[ss[pos] - 'a' + 1], &pair))
- { status = 0; }
- else
- {
- ct[pos] = pair;
- ct[pair] = pos;
- }
- }
- else if (!isgap(ss[pos])) status = 0; /* bad character */
- }
-
- for (i = 0; i < 27; i++)
- if ( FreeIntStack(dolist[i]) > 0)
- status = 0;
-
- *ret_ct = ct;
- return status;
-}
-
-
-#ifdef SRE_REMOVED
-/* Function: WriteCT()
- *
- * Purpose: Write a CT representation of a structure.
- * Written in 1..len sense, with 0 for unpaired
- * positions.
- */
-static void
-WriteCT(FILE *fp, char *seq, int *ct, int len)
-{
- int pos;
- for (pos = 0; pos < len; pos++)
- fprintf(fp, "%d %c %d\n", pos+1, seq[pos], ct[pos]+1);
-}
-#endif
diff --git a/squid/config.sub b/squid/config.sub
deleted file mode 100644
index 578b302..0000000
--- a/squid/config.sub
+++ /dev/null
@@ -1,1375 +0,0 @@
-#! /bin/sh
-# Configuration validation subroutine script.
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
-# Free Software Foundation, Inc.
-
-timestamp='2001-06-08'
-
-# This file is (in principle) common to ALL GNU software.
-# The presence of a machine in this file suggests that SOME GNU software
-# can handle that machine. It does not imply ALL GNU software can.
-#
-# This file is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330,
-# Boston, MA 02111-1307, USA.
-
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that program.
-
-# Please send patches to <config-patches at gnu.org>.
-#
-# Configuration subroutine to validate and canonicalize a configuration type.
-# Supply the specified configuration type as an argument.
-# If it is invalid, we print an error message on stderr and exit with code 1.
-# Otherwise, we print the canonical config type on stdout and succeed.
-
-# This file is supposed to be the same for all GNU packages
-# and recognize all the CPU types, system types and aliases
-# that are meaningful with *any* GNU software.
-# Each package is responsible for reporting which valid configurations
-# it does not support. The user should be able to distinguish
-# a failure to support a valid configuration from a meaningless
-# configuration.
-
-# The goal of this file is to map all the various variations of a given
-# machine specification into a single specification in the form:
-# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
-# or in some cases, the newer four-part form:
-# CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
-# It is wrong to echo any other type of specification.
-
-me=`echo "$0" | sed -e 's,.*/,,'`
-
-usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS
- $0 [OPTION] ALIAS
-
-Canonicalize a configuration name.
-
-Operation modes:
- -h, --help print this help, then exit
- -t, --time-stamp print date of last modification, then exit
- -v, --version print version number, then exit
-
-Report bugs and patches to <config-patches at gnu.org>."
-
-version="\
-GNU config.sub ($timestamp)
-
-Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
-Free Software Foundation, Inc.
-
-This is free software; see the source for copying conditions. There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
- case $1 in
- --time-stamp | --time* | -t )
- echo "$timestamp" ; exit 0 ;;
- --version | -v )
- echo "$version" ; exit 0 ;;
- --help | --h* | -h )
- echo "$usage"; exit 0 ;;
- -- ) # Stop option processing
- shift; break ;;
- - ) # Use stdin as input.
- break ;;
- -* )
- echo "$me: invalid option $1$help"
- exit 1 ;;
-
- *local*)
- # First pass through any local machine types.
- echo $1
- exit 0;;
-
- * )
- break ;;
- esac
-done
-
-case $# in
- 0) echo "$me: missing argument$help" >&2
- exit 1;;
- 1) ;;
- *) echo "$me: too many arguments$help" >&2
- exit 1;;
-esac
-
-# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
-# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
-case $maybe_os in
- nto-qnx* | linux-gnu* | storm-chaos* | os2-emx* | windows32-*)
- os=-$maybe_os
- basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
- ;;
- *)
- basic_machine=`echo $1 | sed 's/-[^-]*$//'`
- if [ $basic_machine != $1 ]
- then os=`echo $1 | sed 's/.*-/-/'`
- else os=; fi
- ;;
-esac
-
-### Let's recognize common machines as not being operating systems so
-### that things like config.sub decstation-3100 work. We also
-### recognize some manufacturers as not being operating systems, so we
-### can provide default operating systems below.
-case $os in
- -sun*os*)
- # Prevent following clause from handling this invalid input.
- ;;
- -dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
- -att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
- -unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
- -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
- -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
- -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
- -apple | -axis)
- os=
- basic_machine=$1
- ;;
- -sim | -cisco | -oki | -wec | -winbond)
- os=
- basic_machine=$1
- ;;
- -scout)
- ;;
- -wrs)
- os=-vxworks
- basic_machine=$1
- ;;
- -chorusos*)
- os=-chorusos
- basic_machine=$1
- ;;
- -chorusrdb)
- os=-chorusrdb
- basic_machine=$1
- ;;
- -hiux*)
- os=-hiuxwe2
- ;;
- -sco5)
- os=-sco3.2v5
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco4)
- os=-sco3.2v4
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2.[4-9]*)
- os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco3.2v[4-9]*)
- # Don't forget version if it is 3.2v4 or newer.
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -sco*)
- os=-sco3.2v2
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -udk*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -isc)
- os=-isc2.2
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -clix*)
- basic_machine=clipper-intergraph
- ;;
- -isc*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
- ;;
- -lynx*)
- os=-lynxos
- ;;
- -ptx*)
- basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
- ;;
- -windowsnt*)
- os=`echo $os | sed -e 's/windowsnt/winnt/'`
- ;;
- -psos*)
- os=-psos
- ;;
- -mint | -mint[0-9]*)
- basic_machine=m68k-atari
- os=-mint
- ;;
-esac
-
-# Decode aliases for certain CPU-COMPANY combinations.
-case $basic_machine in
- # Recognize the basic CPU types without company name.
- # Some are omitted here because they have special meanings below.
- tahoe | i860 | ia64 | m32r | m68k | m68000 | m88k | ns32k | arc \
- | arm | arme[lb] | arm[bl]e | armv[2345] | armv[345][lb] | strongarm | xscale \
- | pyramid | mn10200 | mn10300 | tron | a29k \
- | 580 | i960 | h8300 \
- | x86 | ppcbe | mipsbe | mipsle | shbe | shle \
- | hppa | hppa1.0 | hppa1.1 | hppa2.0 | hppa2.0w | hppa2.0n \
- | hppa64 \
- | alpha | alphaev[4-8] | alphaev56 | alphapca5[67] \
- | alphaev6[78] \
- | we32k | ns16k | clipper | i370 | sh | sh[34] \
- | powerpc | powerpcle \
- | 1750a | dsp16xx | pdp10 | pdp11 \
- | mips16 | mips64 | mipsel | mips64el \
- | mips64orion | mips64orionel | mipstx39 | mipstx39el \
- | mips64vr4300 | mips64vr4300el | mips64vr4100 | mips64vr4100el \
- | mips64vr5000 | mips64vr5000el | mcore | s390 | s390x \
- | sparc | sparclet | sparclite | sparc64 | sparcv9 | sparcv9b \
- | v850 | c4x \
- | thumb | d10v | d30v | fr30 | avr | openrisc | tic80 \
- | pj | pjl | h8500 | z8k)
- basic_machine=$basic_machine-unknown
- ;;
- m6811 | m68hc11 | m6812 | m68hc12)
- # Motorola 68HC11/12.
- basic_machine=$basic_machine-unknown
- os=-none
- ;;
- m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
- ;;
-
- # We use `pc' rather than `unknown'
- # because (1) that's what they normally are, and
- # (2) the word "unknown" tends to confuse beginning users.
- i*86 | x86_64)
- basic_machine=$basic_machine-pc
- ;;
- # Object if more than one company name word.
- *-*-*)
- echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
- exit 1
- ;;
- # Recognize the basic CPU types with company name.
- # FIXME: clean up the formatting here.
- vax-* | tahoe-* | i*86-* | i860-* | ia64-* | m32r-* | m68k-* | m68000-* \
- | m88k-* | sparc-* | ns32k-* | fx80-* | arc-* | c[123]* \
- | arm-* | armbe-* | armle-* | armv*-* | strongarm-* | xscale-* \
- | mips-* | pyramid-* | tron-* | a29k-* | romp-* | rs6000-* \
- | power-* | none-* | 580-* | cray2-* | h8300-* | h8500-* | i960-* \
- | xmp-* | ymp-* \
- | x86-* | ppcbe-* | mipsbe-* | mipsle-* | shbe-* | shle-* \
- | hppa-* | hppa1.0-* | hppa1.1-* | hppa2.0-* | hppa2.0w-* \
- | hppa2.0n-* | hppa64-* \
- | alpha-* | alphaev[4-8]-* | alphaev56-* | alphapca5[67]-* \
- | alphaev6[78]-* \
- | we32k-* | cydra-* | ns16k-* | pn-* | np1-* | xps100-* \
- | clipper-* | orion-* \
- | sparclite-* | pdp10-* | pdp11-* | sh-* | sh[34]-* | sh[34]eb-* \
- | powerpc-* | powerpcle-* | sparc64-* | sparcv9-* | sparcv9b-* | sparc86x-* \
- | mips16-* | mips64-* | mipsel-* \
- | mips64el-* | mips64orion-* | mips64orionel-* \
- | mips64vr4100-* | mips64vr4100el-* | mips64vr4300-* | mips64vr4300el-* \
- | mipstx39-* | mipstx39el-* | mcore-* \
- | f30[01]-* | f700-* | s390-* | s390x-* | sv1-* | t3e-* \
- | [cjt]90-* \
- | m88110-* | m680[01234]0-* | m683?2-* | m68360-* | z8k-* | d10v-* \
- | thumb-* | v850-* | d30v-* | tic30-* | tic80-* | c30-* | fr30-* \
- | bs2000-* | tic54x-* | c54x-* | x86_64-* | pj-* | pjl-*)
- ;;
- # Recognize the various machine names and aliases which stand
- # for a CPU type and a company and sometimes even an OS.
- 386bsd)
- basic_machine=i386-unknown
- os=-bsd
- ;;
- 3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
- basic_machine=m68000-att
- ;;
- 3b*)
- basic_machine=we32k-att
- ;;
- a29khif)
- basic_machine=a29k-amd
- os=-udi
- ;;
- adobe68k)
- basic_machine=m68010-adobe
- os=-scout
- ;;
- alliant | fx80)
- basic_machine=fx80-alliant
- ;;
- altos | altos3068)
- basic_machine=m68k-altos
- ;;
- am29k)
- basic_machine=a29k-none
- os=-bsd
- ;;
- amdahl)
- basic_machine=580-amdahl
- os=-sysv
- ;;
- amiga | amiga-*)
- basic_machine=m68k-unknown
- ;;
- amigaos | amigados)
- basic_machine=m68k-unknown
- os=-amigaos
- ;;
- amigaunix | amix)
- basic_machine=m68k-unknown
- os=-sysv4
- ;;
- apollo68)
- basic_machine=m68k-apollo
- os=-sysv
- ;;
- apollo68bsd)
- basic_machine=m68k-apollo
- os=-bsd
- ;;
- aux)
- basic_machine=m68k-apple
- os=-aux
- ;;
- balance)
- basic_machine=ns32k-sequent
- os=-dynix
- ;;
- convex-c1)
- basic_machine=c1-convex
- os=-bsd
- ;;
- convex-c2)
- basic_machine=c2-convex
- os=-bsd
- ;;
- convex-c32)
- basic_machine=c32-convex
- os=-bsd
- ;;
- convex-c34)
- basic_machine=c34-convex
- os=-bsd
- ;;
- convex-c38)
- basic_machine=c38-convex
- os=-bsd
- ;;
- cray | ymp)
- basic_machine=ymp-cray
- os=-unicos
- ;;
- cray2)
- basic_machine=cray2-cray
- os=-unicos
- ;;
- [cjt]90)
- basic_machine=${basic_machine}-cray
- os=-unicos
- ;;
- crds | unos)
- basic_machine=m68k-crds
- ;;
- cris | cris-* | etrax*)
- basic_machine=cris-axis
- ;;
- da30 | da30-*)
- basic_machine=m68k-da30
- ;;
- decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
- basic_machine=mips-dec
- ;;
- delta | 3300 | motorola-3300 | motorola-delta \
- | 3300-motorola | delta-motorola)
- basic_machine=m68k-motorola
- ;;
- delta88)
- basic_machine=m88k-motorola
- os=-sysv3
- ;;
- dpx20 | dpx20-*)
- basic_machine=rs6000-bull
- os=-bosx
- ;;
- dpx2* | dpx2*-bull)
- basic_machine=m68k-bull
- os=-sysv3
- ;;
- ebmon29k)
- basic_machine=a29k-amd
- os=-ebmon
- ;;
- elxsi)
- basic_machine=elxsi-elxsi
- os=-bsd
- ;;
- encore | umax | mmax)
- basic_machine=ns32k-encore
- ;;
- es1800 | OSE68k | ose68k | ose | OSE)
- basic_machine=m68k-ericsson
- os=-ose
- ;;
- fx2800)
- basic_machine=i860-alliant
- ;;
- genix)
- basic_machine=ns32k-ns
- ;;
- gmicro)
- basic_machine=tron-gmicro
- os=-sysv
- ;;
- go32)
- basic_machine=i386-pc
- os=-go32
- ;;
- h3050r* | hiux*)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- h8300hms)
- basic_machine=h8300-hitachi
- os=-hms
- ;;
- h8300xray)
- basic_machine=h8300-hitachi
- os=-xray
- ;;
- h8500hms)
- basic_machine=h8500-hitachi
- os=-hms
- ;;
- harris)
- basic_machine=m88k-harris
- os=-sysv3
- ;;
- hp300-*)
- basic_machine=m68k-hp
- ;;
- hp300bsd)
- basic_machine=m68k-hp
- os=-bsd
- ;;
- hp300hpux)
- basic_machine=m68k-hp
- os=-hpux
- ;;
- hp3k9[0-9][0-9] | hp9[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hp9k2[0-9][0-9] | hp9k31[0-9])
- basic_machine=m68000-hp
- ;;
- hp9k3[2-9][0-9])
- basic_machine=m68k-hp
- ;;
- hp9k6[0-9][0-9] | hp6[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hp9k7[0-79][0-9] | hp7[0-79][0-9])
- basic_machine=hppa1.1-hp
- ;;
- hp9k78[0-9] | hp78[0-9])
- # FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
- ;;
- hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
- # FIXME: really hppa2.0-hp
- basic_machine=hppa1.1-hp
- ;;
- hp9k8[0-9][13679] | hp8[0-9][13679])
- basic_machine=hppa1.1-hp
- ;;
- hp9k8[0-9][0-9] | hp8[0-9][0-9])
- basic_machine=hppa1.0-hp
- ;;
- hppa-next)
- os=-nextstep3
- ;;
- hppaosf)
- basic_machine=hppa1.1-hp
- os=-osf
- ;;
- hppro)
- basic_machine=hppa1.1-hp
- os=-proelf
- ;;
- i370-ibm* | ibm*)
- basic_machine=i370-ibm
- ;;
-# I'm not sure what "Sysv32" means. Should this be sysv3.2?
- i*86v32)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv32
- ;;
- i*86v4*)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv4
- ;;
- i*86v)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-sysv
- ;;
- i*86sol2)
- basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
- os=-solaris2
- ;;
- i386mach)
- basic_machine=i386-mach
- os=-mach
- ;;
- i386-vsta | vsta)
- basic_machine=i386-unknown
- os=-vsta
- ;;
- iris | iris4d)
- basic_machine=mips-sgi
- case $os in
- -irix*)
- ;;
- *)
- os=-irix4
- ;;
- esac
- ;;
- isi68 | isi)
- basic_machine=m68k-isi
- os=-sysv
- ;;
- m88k-omron*)
- basic_machine=m88k-omron
- ;;
- magnum | m3230)
- basic_machine=mips-mips
- os=-sysv
- ;;
- merlin)
- basic_machine=ns32k-utek
- os=-sysv
- ;;
- mingw32)
- basic_machine=i386-pc
- os=-mingw32
- ;;
- miniframe)
- basic_machine=m68000-convergent
- ;;
- *mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
- basic_machine=m68k-atari
- os=-mint
- ;;
- mipsel*-linux*)
- basic_machine=mipsel-unknown
- os=-linux-gnu
- ;;
- mips*-linux*)
- basic_machine=mips-unknown
- os=-linux-gnu
- ;;
- mips3*-*)
- basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
- ;;
- mips3*)
- basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
- ;;
- mmix*)
- basic_machine=mmix-knuth
- os=-mmixware
- ;;
- monitor)
- basic_machine=m68k-rom68k
- os=-coff
- ;;
- msdos)
- basic_machine=i386-pc
- os=-msdos
- ;;
- mvs)
- basic_machine=i370-ibm
- os=-mvs
- ;;
- ncr3000)
- basic_machine=i486-ncr
- os=-sysv4
- ;;
- netbsd386)
- basic_machine=i386-unknown
- os=-netbsd
- ;;
- netwinder)
- basic_machine=armv4l-rebel
- os=-linux
- ;;
- news | news700 | news800 | news900)
- basic_machine=m68k-sony
- os=-newsos
- ;;
- news1000)
- basic_machine=m68030-sony
- os=-newsos
- ;;
- news-3600 | risc-news)
- basic_machine=mips-sony
- os=-newsos
- ;;
- necv70)
- basic_machine=v70-nec
- os=-sysv
- ;;
- next | m*-next )
- basic_machine=m68k-next
- case $os in
- -nextstep* )
- ;;
- -ns2*)
- os=-nextstep2
- ;;
- *)
- os=-nextstep3
- ;;
- esac
- ;;
- nh3000)
- basic_machine=m68k-harris
- os=-cxux
- ;;
- nh[45]000)
- basic_machine=m88k-harris
- os=-cxux
- ;;
- nindy960)
- basic_machine=i960-intel
- os=-nindy
- ;;
- mon960)
- basic_machine=i960-intel
- os=-mon960
- ;;
- nonstopux)
- basic_machine=mips-compaq
- os=-nonstopux
- ;;
- np1)
- basic_machine=np1-gould
- ;;
- nsr-tandem)
- basic_machine=nsr-tandem
- ;;
- op50n-* | op60c-*)
- basic_machine=hppa1.1-oki
- os=-proelf
- ;;
- OSE68000 | ose68000)
- basic_machine=m68000-ericsson
- os=-ose
- ;;
- os68k)
- basic_machine=m68k-none
- os=-os68k
- ;;
- pa-hitachi)
- basic_machine=hppa1.1-hitachi
- os=-hiuxwe2
- ;;
- paragon)
- basic_machine=i860-intel
- os=-osf
- ;;
- pbd)
- basic_machine=sparc-tti
- ;;
- pbb)
- basic_machine=m68k-tti
- ;;
- pc532 | pc532-*)
- basic_machine=ns32k-pc532
- ;;
- pentium | p5 | k5 | k6 | nexgen)
- basic_machine=i586-pc
- ;;
- pentiumpro | p6 | 6x86 | athlon)
- basic_machine=i686-pc
- ;;
- pentiumii | pentium2)
- basic_machine=i686-pc
- ;;
- pentium-* | p5-* | k5-* | k6-* | nexgen-*)
- basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentiumpro-* | p6-* | 6x86-* | athlon-*)
- basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pentiumii-* | pentium2-*)
- basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- pn)
- basic_machine=pn-gould
- ;;
- power) basic_machine=power-ibm
- ;;
- ppc) basic_machine=powerpc-unknown
- ;;
- ppc-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ppcle | powerpclittle | ppc-le | powerpc-little)
- basic_machine=powerpcle-unknown
- ;;
- ppcle-* | powerpclittle-*)
- basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
- ;;
- ps2)
- basic_machine=i386-ibm
- ;;
- pw32)
- basic_machine=i586-unknown
- os=-pw32
- ;;
- rom68k)
- basic_machine=m68k-rom68k
- os=-coff
- ;;
- rm[46]00)
- basic_machine=mips-siemens
- ;;
- rtpc | rtpc-*)
- basic_machine=romp-ibm
- ;;
- sa29200)
- basic_machine=a29k-amd
- os=-udi
- ;;
- sequent)
- basic_machine=i386-sequent
- ;;
- sh)
- basic_machine=sh-hitachi
- os=-hms
- ;;
- sparclite-wrs)
- basic_machine=sparclite-wrs
- os=-vxworks
- ;;
- sps7)
- basic_machine=m68k-bull
- os=-sysv2
- ;;
- spur)
- basic_machine=spur-unknown
- ;;
- st2000)
- basic_machine=m68k-tandem
- ;;
- stratus)
- basic_machine=i860-stratus
- os=-sysv4
- ;;
- sun2)
- basic_machine=m68000-sun
- ;;
- sun2os3)
- basic_machine=m68000-sun
- os=-sunos3
- ;;
- sun2os4)
- basic_machine=m68000-sun
- os=-sunos4
- ;;
- sun3os3)
- basic_machine=m68k-sun
- os=-sunos3
- ;;
- sun3os4)
- basic_machine=m68k-sun
- os=-sunos4
- ;;
- sun4os3)
- basic_machine=sparc-sun
- os=-sunos3
- ;;
- sun4os4)
- basic_machine=sparc-sun
- os=-sunos4
- ;;
- sun4sol2)
- basic_machine=sparc-sun
- os=-solaris2
- ;;
- sun3 | sun3-*)
- basic_machine=m68k-sun
- ;;
- sun4)
- basic_machine=sparc-sun
- ;;
- sun386 | sun386i | roadrunner)
- basic_machine=i386-sun
- ;;
- sv1)
- basic_machine=sv1-cray
- os=-unicos
- ;;
- symmetry)
- basic_machine=i386-sequent
- os=-dynix
- ;;
- t3e)
- basic_machine=t3e-cray
- os=-unicos
- ;;
- tic54x | c54x*)
- basic_machine=tic54x-unknown
- os=-coff
- ;;
- tx39)
- basic_machine=mipstx39-unknown
- ;;
- tx39el)
- basic_machine=mipstx39el-unknown
- ;;
- tower | tower-32)
- basic_machine=m68k-ncr
- ;;
- udi29k)
- basic_machine=a29k-amd
- os=-udi
- ;;
- ultra3)
- basic_machine=a29k-nyu
- os=-sym1
- ;;
- v810 | necv810)
- basic_machine=v810-nec
- os=-none
- ;;
- vaxv)
- basic_machine=vax-dec
- os=-sysv
- ;;
- vms)
- basic_machine=vax-dec
- os=-vms
- ;;
- vpp*|vx|vx-*)
- basic_machine=f301-fujitsu
- ;;
- vxworks960)
- basic_machine=i960-wrs
- os=-vxworks
- ;;
- vxworks68)
- basic_machine=m68k-wrs
- os=-vxworks
- ;;
- vxworks29k)
- basic_machine=a29k-wrs
- os=-vxworks
- ;;
- w65*)
- basic_machine=w65-wdc
- os=-none
- ;;
- w89k-*)
- basic_machine=hppa1.1-winbond
- os=-proelf
- ;;
- windows32)
- basic_machine=i386-pc
- os=-windows32-msvcrt
- ;;
- xmp)
- basic_machine=xmp-cray
- os=-unicos
- ;;
- xps | xps100)
- basic_machine=xps100-honeywell
- ;;
- z8k-*-coff)
- basic_machine=z8k-unknown
- os=-sim
- ;;
- none)
- basic_machine=none-none
- os=-none
- ;;
-
-# Here we handle the default manufacturer of certain CPU types. It is in
-# some cases the only manufacturer, in others, it is the most popular.
- w89k)
- basic_machine=hppa1.1-winbond
- ;;
- op50n)
- basic_machine=hppa1.1-oki
- ;;
- op60c)
- basic_machine=hppa1.1-oki
- ;;
- mips)
- if [ x$os = x-linux-gnu ]; then
- basic_machine=mips-unknown
- else
- basic_machine=mips-mips
- fi
- ;;
- romp)
- basic_machine=romp-ibm
- ;;
- rs6000)
- basic_machine=rs6000-ibm
- ;;
- vax)
- basic_machine=vax-dec
- ;;
- pdp10)
- # there are many clones, so DEC is not a safe bet
- basic_machine=pdp10-unknown
- ;;
- pdp11)
- basic_machine=pdp11-dec
- ;;
- we32k)
- basic_machine=we32k-att
- ;;
- sh3 | sh4)
- basic_machine=sh-unknown
- ;;
- sparc | sparcv9 | sparcv9b)
- basic_machine=sparc-sun
- ;;
- cydra)
- basic_machine=cydra-cydrome
- ;;
- orion)
- basic_machine=orion-highlevel
- ;;
- orion105)
- basic_machine=clipper-highlevel
- ;;
- mac | mpw | mac-mpw)
- basic_machine=m68k-apple
- ;;
- pmac | pmac-mpw)
- basic_machine=powerpc-apple
- ;;
- c4x*)
- basic_machine=c4x-none
- os=-coff
- ;;
- *-unknown)
- # Make sure to match an already-canonicalized machine name.
- ;;
- *)
- echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
- exit 1
- ;;
-esac
-
-# Here we canonicalize certain aliases for manufacturers.
-case $basic_machine in
- *-digital*)
- basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
- ;;
- *-commodore*)
- basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
- ;;
- *)
- ;;
-esac
-
-# Decode manufacturer-specific aliases for certain operating systems.
-
-if [ x"$os" != x"" ]
-then
-case $os in
- # First match some system type aliases
- # that might get confused with valid system types.
- # -solaris* is a basic system type, with this one exception.
- -solaris1 | -solaris1.*)
- os=`echo $os | sed -e 's|solaris1|sunos4|'`
- ;;
- -solaris)
- os=-solaris2
- ;;
- -svr4*)
- os=-sysv4
- ;;
- -unixware*)
- os=-sysv4.2uw
- ;;
- -gnu/linux*)
- os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
- ;;
- # First accept the basic system types.
- # The portable systems comes first.
- # Each alternative MUST END IN A *, to match a version number.
- # -sysv* is not here because it comes later, after sysvr4.
- -gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
- | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
- | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
- | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
- | -aos* \
- | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
- | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
- | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
- | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
- | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
- | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
- | -chorusos* | -chorusrdb* \
- | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
- | -interix* | -uwin* | -rhapsody* | -darwin* | -opened* \
- | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
- | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* | -os2*)
- # Remember, each alternative MUST END IN *, to match a version number.
- ;;
- -qnx*)
- case $basic_machine in
- x86-* | i*86-*)
- ;;
- *)
- os=-nto$os
- ;;
- esac
- ;;
- -nto*)
- os=-nto-qnx
- ;;
- -sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
- | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
- | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
- ;;
- -mac*)
- os=`echo $os | sed -e 's|mac|macos|'`
- ;;
- -linux*)
- os=`echo $os | sed -e 's|linux|linux-gnu|'`
- ;;
- -sunos5*)
- os=`echo $os | sed -e 's|sunos5|solaris2|'`
- ;;
- -sunos6*)
- os=`echo $os | sed -e 's|sunos6|solaris3|'`
- ;;
- -opened*)
- os=-openedition
- ;;
- -wince*)
- os=-wince
- ;;
- -osfrose*)
- os=-osfrose
- ;;
- -osf*)
- os=-osf
- ;;
- -utek*)
- os=-bsd
- ;;
- -dynix*)
- os=-bsd
- ;;
- -acis*)
- os=-aos
- ;;
- -386bsd)
- os=-bsd
- ;;
- -ctix* | -uts*)
- os=-sysv
- ;;
- -ns2 )
- os=-nextstep2
- ;;
- -nsk*)
- os=-nsk
- ;;
- # Preserve the version number of sinix5.
- -sinix5.*)
- os=`echo $os | sed -e 's|sinix|sysv|'`
- ;;
- -sinix*)
- os=-sysv4
- ;;
- -triton*)
- os=-sysv3
- ;;
- -oss*)
- os=-sysv3
- ;;
- -svr4)
- os=-sysv4
- ;;
- -svr3)
- os=-sysv3
- ;;
- -sysvr4)
- os=-sysv4
- ;;
- # This must come after -sysvr4.
- -sysv*)
- ;;
- -ose*)
- os=-ose
- ;;
- -es1800*)
- os=-ose
- ;;
- -xenix)
- os=-xenix
- ;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
- os=-mint
- ;;
- -none)
- ;;
- *)
- # Get rid of the `-' at the beginning of $os.
- os=`echo $os | sed 's/[^-]*-//'`
- echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
- exit 1
- ;;
-esac
-else
-
-# Here we handle the default operating systems that come with various machines.
-# The value should be what the vendor currently ships out the door with their
-# machine or put another way, the most popular os provided with the machine.
-
-# Note that if you're going to try to match "-MANUFACTURER" here (say,
-# "-sun"), then you have to tell the case statement up towards the top
-# that MANUFACTURER isn't an operating system. Otherwise, code above
-# will signal an error saying that MANUFACTURER isn't an operating
-# system, and we'll never get to this point.
-
-case $basic_machine in
- *-acorn)
- os=-riscix1.2
- ;;
- arm*-rebel)
- os=-linux
- ;;
- arm*-semi)
- os=-aout
- ;;
- pdp10-*)
- os=-tops20
- ;;
- pdp11-*)
- os=-none
- ;;
- *-dec | vax-*)
- os=-ultrix4.2
- ;;
- m68*-apollo)
- os=-domain
- ;;
- i386-sun)
- os=-sunos4.0.2
- ;;
- m68000-sun)
- os=-sunos3
- # This also exists in the configure program, but was not the
- # default.
- # os=-sunos4
- ;;
- m68*-cisco)
- os=-aout
- ;;
- mips*-cisco)
- os=-elf
- ;;
- mips*-*)
- os=-elf
- ;;
- *-tti) # must be before sparc entry or we get the wrong os.
- os=-sysv3
- ;;
- sparc-* | *-sun)
- os=-sunos4.1.1
- ;;
- *-be)
- os=-beos
- ;;
- *-ibm)
- os=-aix
- ;;
- *-wec)
- os=-proelf
- ;;
- *-winbond)
- os=-proelf
- ;;
- *-oki)
- os=-proelf
- ;;
- *-hp)
- os=-hpux
- ;;
- *-hitachi)
- os=-hiux
- ;;
- i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
- os=-sysv
- ;;
- *-cbm)
- os=-amigaos
- ;;
- *-dg)
- os=-dgux
- ;;
- *-dolphin)
- os=-sysv3
- ;;
- m68k-ccur)
- os=-rtu
- ;;
- m88k-omron*)
- os=-luna
- ;;
- *-next )
- os=-nextstep
- ;;
- *-sequent)
- os=-ptx
- ;;
- *-crds)
- os=-unos
- ;;
- *-ns)
- os=-genix
- ;;
- i370-*)
- os=-mvs
- ;;
- *-next)
- os=-nextstep3
- ;;
- *-gould)
- os=-sysv
- ;;
- *-highlevel)
- os=-bsd
- ;;
- *-encore)
- os=-bsd
- ;;
- *-sgi)
- os=-irix
- ;;
- *-siemens)
- os=-sysv4
- ;;
- *-masscomp)
- os=-rtu
- ;;
- f30[01]-fujitsu | f700-fujitsu)
- os=-uxpv
- ;;
- *-rom68k)
- os=-coff
- ;;
- *-*bug)
- os=-coff
- ;;
- *-apple)
- os=-macos
- ;;
- *-atari*)
- os=-mint
- ;;
- *)
- os=-none
- ;;
-esac
-fi
-
-# Here we handle the case where we know the os, and the CPU type, but not the
-# manufacturer. We pick the logical manufacturer.
-vendor=unknown
-case $basic_machine in
- *-unknown)
- case $os in
- -riscix*)
- vendor=acorn
- ;;
- -sunos*)
- vendor=sun
- ;;
- -aix*)
- vendor=ibm
- ;;
- -beos*)
- vendor=be
- ;;
- -hpux*)
- vendor=hp
- ;;
- -mpeix*)
- vendor=hp
- ;;
- -hiux*)
- vendor=hitachi
- ;;
- -unos*)
- vendor=crds
- ;;
- -dgux*)
- vendor=dg
- ;;
- -luna*)
- vendor=omron
- ;;
- -genix*)
- vendor=ns
- ;;
- -mvs* | -opened*)
- vendor=ibm
- ;;
- -ptx*)
- vendor=sequent
- ;;
- -vxsim* | -vxworks*)
- vendor=wrs
- ;;
- -aux*)
- vendor=apple
- ;;
- -hms*)
- vendor=hitachi
- ;;
- -mpw* | -macos*)
- vendor=apple
- ;;
- -*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
- vendor=atari
- ;;
- esac
- basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
- ;;
-esac
-
-echo $basic_machine$os
-exit 0
-
-# Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/squid/configure b/squid/configure
deleted file mode 100755
index f70158e..0000000
--- a/squid/configure
+++ /dev/null
@@ -1,8042 +0,0 @@
-#! /bin/sh
-# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.57 for SQUID 1.9g.
-#
-# Report bugs to <eddy at genetics.wustl.edu>.
-#
-# Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
-# Free Software Foundation, Inc.
-# This configure script is free software; the Free Software Foundation
-# gives unlimited permission to copy, distribute and modify it.
-## --------------------- ##
-## M4sh Initialization. ##
-## --------------------- ##
-
-# Be Bourne compatible
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
- emulate sh
- NULLCMD=:
- # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '${1+"$@"}'='"$@"'
-elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
- set -o posix
-fi
-
-# Support unset when possible.
-if (FOO=FOO; unset FOO) >/dev/null 2>&1; then
- as_unset=unset
-else
- as_unset=false
-fi
-
-
-# Work around bugs in pre-3.0 UWIN ksh.
-$as_unset ENV MAIL MAILPATH
-PS1='$ '
-PS2='> '
-PS4='+ '
-
-# NLS nuisances.
-for as_var in \
- LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
- LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
- LC_TELEPHONE LC_TIME
-do
- if (set +x; test -n "`(eval $as_var=C; export $as_var) 2>&1`"); then
- eval $as_var=C; export $as_var
- else
- $as_unset $as_var
- fi
-done
-
-# Required to use basename.
-if expr a : '\(a\)' >/dev/null 2>&1; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
- as_basename=basename
-else
- as_basename=false
-fi
-
-
-# Name of the executable.
-as_me=`$as_basename "$0" ||
-$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)$' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X/"$0" |
- sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
- /^X\/\(\/\/\)$/{ s//\1/; q; }
- /^X\/\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
-
-
-# PATH needs CR, and LINENO needs CR and PATH.
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
-
-# The user is always right.
-if test "${PATH_SEPARATOR+set}" != set; then
- echo "#! /bin/sh" >conf$$.sh
- echo "exit 0" >>conf$$.sh
- chmod +x conf$$.sh
- if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
- PATH_SEPARATOR=';'
- else
- PATH_SEPARATOR=:
- fi
- rm -f conf$$.sh
-fi
-
-
- as_lineno_1=$LINENO
- as_lineno_2=$LINENO
- as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
- test "x$as_lineno_1" != "x$as_lineno_2" &&
- test "x$as_lineno_3" = "x$as_lineno_2" || {
- # Find who we are. Look in the path if we contain no path at all
- # relative or not.
- case $0 in
- *[\\/]* ) as_myself=$0 ;;
- *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
-done
-
- ;;
- esac
- # We did not find ourselves, most probably we were run as `sh COMMAND'
- # in which case we are not to be found in the path.
- if test "x$as_myself" = x; then
- as_myself=$0
- fi
- if test ! -f "$as_myself"; then
- { echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2
- { (exit 1); exit 1; }; }
- fi
- case $CONFIG_SHELL in
- '')
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for as_base in sh bash ksh sh5; do
- case $as_dir in
- /*)
- if ("$as_dir/$as_base" -c '
- as_lineno_1=$LINENO
- as_lineno_2=$LINENO
- as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
- test "x$as_lineno_1" != "x$as_lineno_2" &&
- test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
- $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
- $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
- CONFIG_SHELL=$as_dir/$as_base
- export CONFIG_SHELL
- exec "$CONFIG_SHELL" "$0" ${1+"$@"}
- fi;;
- esac
- done
-done
-;;
- esac
-
- # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
- # uniformly replaced by the line number. The first 'sed' inserts a
- # line-number line before each line; the second 'sed' does the real
- # work. The second script uses 'N' to pair each line-number line
- # with the numbered line, and appends trailing '-' during
- # substitution so that $LINENO is not a special case at line end.
- # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
- # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
- sed '=' <$as_myself |
- sed '
- N
- s,$,-,
- : loop
- s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
- t loop
- s,-$,,
- s,^['$as_cr_digits']*\n,,
- ' >$as_me.lineno &&
- chmod +x $as_me.lineno ||
- { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
- { (exit 1); exit 1; }; }
-
- # Don't try to exec as it changes $[0], causing all sort of problems
- # (the dirname of $[0] is not the place where we might find the
- # original and so on. Autoconf is especially sensible to this).
- . ./$as_me.lineno
- # Exit status is that of the last command.
- exit
-}
-
-
-case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
- *c*,-n*) ECHO_N= ECHO_C='
-' ECHO_T=' ' ;;
- *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
- *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
-esac
-
-if expr a : '\(a\)' >/dev/null 2>&1; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-rm -f conf$$ conf$$.exe conf$$.file
-echo >conf$$.file
-if ln -s conf$$.file conf$$ 2>/dev/null; then
- # We could just check for DJGPP; but this test a) works b) is more generic
- # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
- if test -f conf$$.exe; then
- # Don't use ln at all; we don't have any links
- as_ln_s='cp -p'
- else
- as_ln_s='ln -s'
- fi
-elif ln conf$$.file conf$$ 2>/dev/null; then
- as_ln_s=ln
-else
- as_ln_s='cp -p'
-fi
-rm -f conf$$ conf$$.exe conf$$.file
-
-if mkdir -p . 2>/dev/null; then
- as_mkdir_p=:
-else
- as_mkdir_p=false
-fi
-
-as_executable_p="test -f"
-
-# Sed expression to map a string onto a valid CPP name.
-as_tr_cpp="sed y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g"
-
-# Sed expression to map a string onto a valid variable name.
-as_tr_sh="sed y%*+%pp%;s%[^_$as_cr_alnum]%_%g"
-
-
-# IFS
-# We need space, tab and new line, in precisely that order.
-as_nl='
-'
-IFS=" $as_nl"
-
-# CDPATH.
-$as_unset CDPATH
-
-
-# Name of the host.
-# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
-# so uname gets run too.
-ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
-
-exec 6>&1
-
-#
-# Initializations.
-#
-ac_default_prefix=/usr/local
-ac_config_libobj_dir=.
-cross_compiling=no
-subdirs=
-MFLAGS=
-MAKEFLAGS=
-SHELL=${CONFIG_SHELL-/bin/sh}
-
-# Maximum number of lines to put in a shell here document.
-# This variable seems obsolete. It should probably be removed, and
-# only ac_max_sed_lines should be used.
-: ${ac_max_here_lines=38}
-
-# Identity of this package.
-PACKAGE_NAME='SQUID'
-PACKAGE_TARNAME='squid'
-PACKAGE_VERSION='1.9g'
-PACKAGE_STRING='SQUID 1.9g'
-PACKAGE_BUGREPORT='eddy at genetics.wustl.edu'
-
-# Factoring default headers for most tests.
-ac_includes_default="\
-#include <stdio.h>
-#if HAVE_SYS_TYPES_H
-# include <sys/types.h>
-#endif
-#if HAVE_SYS_STAT_H
-# include <sys/stat.h>
-#endif
-#if STDC_HEADERS
-# include <stdlib.h>
-# include <stddef.h>
-#else
-# if HAVE_STDLIB_H
-# include <stdlib.h>
-# endif
-#endif
-#if HAVE_STRING_H
-# if !STDC_HEADERS && HAVE_MEMORY_H
-# include <memory.h>
-# endif
-# include <string.h>
-#endif
-#if HAVE_STRINGS_H
-# include <strings.h>
-#endif
-#if HAVE_INTTYPES_H
-# include <inttypes.h>
-#else
-# if HAVE_STDINT_H
-# include <stdint.h>
-# endif
-#endif
-#if HAVE_UNISTD_H
-# include <unistd.h>
-#endif"
-
-ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS SQUID_RELCODE SQUID_DATE SQUID_COPYRIGHT SQUID_LICENSE SQUID_LICENSETAG SQUID_VERSION CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT RANLIB ac_ct [...]
-ac_subst_files=''
-
-# Initialize some variables set by options.
-ac_init_help=
-ac_init_version=false
-# The variables have the same names as the options, with
-# dashes changed to underlines.
-cache_file=/dev/null
-exec_prefix=NONE
-no_create=
-no_recursion=
-prefix=NONE
-program_prefix=NONE
-program_suffix=NONE
-program_transform_name=s,x,x,
-silent=
-site=
-srcdir=
-verbose=
-x_includes=NONE
-x_libraries=NONE
-
-# Installation directory options.
-# These are left unexpanded so users can "make install exec_prefix=/foo"
-# and all the variables that are supposed to be based on exec_prefix
-# by default will actually change.
-# Use braces instead of parens because sh, perl, etc. also accept them.
-bindir='${exec_prefix}/bin'
-sbindir='${exec_prefix}/sbin'
-libexecdir='${exec_prefix}/libexec'
-datadir='${prefix}/share'
-sysconfdir='${prefix}/etc'
-sharedstatedir='${prefix}/com'
-localstatedir='${prefix}/var'
-libdir='${exec_prefix}/lib'
-includedir='${prefix}/include'
-oldincludedir='/usr/include'
-infodir='${prefix}/info'
-mandir='${prefix}/man'
-
-ac_prev=
-for ac_option
-do
- # If the previous option needs an argument, assign it.
- if test -n "$ac_prev"; then
- eval "$ac_prev=\$ac_option"
- ac_prev=
- continue
- fi
-
- ac_optarg=`expr "x$ac_option" : 'x[^=]*=\(.*\)'`
-
- # Accept the important Cygnus configure options, so we can diagnose typos.
-
- case $ac_option in
-
- -bindir | --bindir | --bindi | --bind | --bin | --bi)
- ac_prev=bindir ;;
- -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
- bindir=$ac_optarg ;;
-
- -build | --build | --buil | --bui | --bu)
- ac_prev=build_alias ;;
- -build=* | --build=* | --buil=* | --bui=* | --bu=*)
- build_alias=$ac_optarg ;;
-
- -cache-file | --cache-file | --cache-fil | --cache-fi \
- | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
- ac_prev=cache_file ;;
- -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
- | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
- cache_file=$ac_optarg ;;
-
- --config-cache | -C)
- cache_file=config.cache ;;
-
- -datadir | --datadir | --datadi | --datad | --data | --dat | --da)
- ac_prev=datadir ;;
- -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \
- | --da=*)
- datadir=$ac_optarg ;;
-
- -disable-* | --disable-*)
- ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid feature name: $ac_feature" >&2
- { (exit 1); exit 1; }; }
- ac_feature=`echo $ac_feature | sed 's/-/_/g'`
- eval "enable_$ac_feature=no" ;;
-
- -enable-* | --enable-*)
- ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid feature name: $ac_feature" >&2
- { (exit 1); exit 1; }; }
- ac_feature=`echo $ac_feature | sed 's/-/_/g'`
- case $ac_option in
- *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
- *) ac_optarg=yes ;;
- esac
- eval "enable_$ac_feature='$ac_optarg'" ;;
-
- -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
- | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
- | --exec | --exe | --ex)
- ac_prev=exec_prefix ;;
- -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
- | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
- | --exec=* | --exe=* | --ex=*)
- exec_prefix=$ac_optarg ;;
-
- -gas | --gas | --ga | --g)
- # Obsolete; use --with-gas.
- with_gas=yes ;;
-
- -help | --help | --hel | --he | -h)
- ac_init_help=long ;;
- -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
- ac_init_help=recursive ;;
- -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
- ac_init_help=short ;;
-
- -host | --host | --hos | --ho)
- ac_prev=host_alias ;;
- -host=* | --host=* | --hos=* | --ho=*)
- host_alias=$ac_optarg ;;
-
- -includedir | --includedir | --includedi | --included | --include \
- | --includ | --inclu | --incl | --inc)
- ac_prev=includedir ;;
- -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
- | --includ=* | --inclu=* | --incl=* | --inc=*)
- includedir=$ac_optarg ;;
-
- -infodir | --infodir | --infodi | --infod | --info | --inf)
- ac_prev=infodir ;;
- -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
- infodir=$ac_optarg ;;
-
- -libdir | --libdir | --libdi | --libd)
- ac_prev=libdir ;;
- -libdir=* | --libdir=* | --libdi=* | --libd=*)
- libdir=$ac_optarg ;;
-
- -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
- | --libexe | --libex | --libe)
- ac_prev=libexecdir ;;
- -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
- | --libexe=* | --libex=* | --libe=*)
- libexecdir=$ac_optarg ;;
-
- -localstatedir | --localstatedir | --localstatedi | --localstated \
- | --localstate | --localstat | --localsta | --localst \
- | --locals | --local | --loca | --loc | --lo)
- ac_prev=localstatedir ;;
- -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
- | --localstate=* | --localstat=* | --localsta=* | --localst=* \
- | --locals=* | --local=* | --loca=* | --loc=* | --lo=*)
- localstatedir=$ac_optarg ;;
-
- -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
- ac_prev=mandir ;;
- -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
- mandir=$ac_optarg ;;
-
- -nfp | --nfp | --nf)
- # Obsolete; use --without-fp.
- with_fp=no ;;
-
- -no-create | --no-create | --no-creat | --no-crea | --no-cre \
- | --no-cr | --no-c | -n)
- no_create=yes ;;
-
- -no-recursion | --no-recursion | --no-recursio | --no-recursi \
- | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
- no_recursion=yes ;;
-
- -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
- | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
- | --oldin | --oldi | --old | --ol | --o)
- ac_prev=oldincludedir ;;
- -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
- | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
- | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
- oldincludedir=$ac_optarg ;;
-
- -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
- ac_prev=prefix ;;
- -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
- prefix=$ac_optarg ;;
-
- -program-prefix | --program-prefix | --program-prefi | --program-pref \
- | --program-pre | --program-pr | --program-p)
- ac_prev=program_prefix ;;
- -program-prefix=* | --program-prefix=* | --program-prefi=* \
- | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
- program_prefix=$ac_optarg ;;
-
- -program-suffix | --program-suffix | --program-suffi | --program-suff \
- | --program-suf | --program-su | --program-s)
- ac_prev=program_suffix ;;
- -program-suffix=* | --program-suffix=* | --program-suffi=* \
- | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
- program_suffix=$ac_optarg ;;
-
- -program-transform-name | --program-transform-name \
- | --program-transform-nam | --program-transform-na \
- | --program-transform-n | --program-transform- \
- | --program-transform | --program-transfor \
- | --program-transfo | --program-transf \
- | --program-trans | --program-tran \
- | --progr-tra | --program-tr | --program-t)
- ac_prev=program_transform_name ;;
- -program-transform-name=* | --program-transform-name=* \
- | --program-transform-nam=* | --program-transform-na=* \
- | --program-transform-n=* | --program-transform-=* \
- | --program-transform=* | --program-transfor=* \
- | --program-transfo=* | --program-transf=* \
- | --program-trans=* | --program-tran=* \
- | --progr-tra=* | --program-tr=* | --program-t=*)
- program_transform_name=$ac_optarg ;;
-
- -q | -quiet | --quiet | --quie | --qui | --qu | --q \
- | -silent | --silent | --silen | --sile | --sil)
- silent=yes ;;
-
- -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
- ac_prev=sbindir ;;
- -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
- | --sbi=* | --sb=*)
- sbindir=$ac_optarg ;;
-
- -sharedstatedir | --sharedstatedir | --sharedstatedi \
- | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
- | --sharedst | --shareds | --shared | --share | --shar \
- | --sha | --sh)
- ac_prev=sharedstatedir ;;
- -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
- | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
- | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
- | --sha=* | --sh=*)
- sharedstatedir=$ac_optarg ;;
-
- -site | --site | --sit)
- ac_prev=site ;;
- -site=* | --site=* | --sit=*)
- site=$ac_optarg ;;
-
- -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
- ac_prev=srcdir ;;
- -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
- srcdir=$ac_optarg ;;
-
- -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
- | --syscon | --sysco | --sysc | --sys | --sy)
- ac_prev=sysconfdir ;;
- -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
- | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
- sysconfdir=$ac_optarg ;;
-
- -target | --target | --targe | --targ | --tar | --ta | --t)
- ac_prev=target_alias ;;
- -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
- target_alias=$ac_optarg ;;
-
- -v | -verbose | --verbose | --verbos | --verbo | --verb)
- verbose=yes ;;
-
- -version | --version | --versio | --versi | --vers | -V)
- ac_init_version=: ;;
-
- -with-* | --with-*)
- ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid package name: $ac_package" >&2
- { (exit 1); exit 1; }; }
- ac_package=`echo $ac_package| sed 's/-/_/g'`
- case $ac_option in
- *=*) ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`;;
- *) ac_optarg=yes ;;
- esac
- eval "with_$ac_package='$ac_optarg'" ;;
-
- -without-* | --without-*)
- ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
- # Reject names that are not valid shell variable names.
- expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid package name: $ac_package" >&2
- { (exit 1); exit 1; }; }
- ac_package=`echo $ac_package | sed 's/-/_/g'`
- eval "with_$ac_package=no" ;;
-
- --x)
- # Obsolete; use --with-x.
- with_x=yes ;;
-
- -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
- | --x-incl | --x-inc | --x-in | --x-i)
- ac_prev=x_includes ;;
- -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
- | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
- x_includes=$ac_optarg ;;
-
- -x-libraries | --x-libraries | --x-librarie | --x-librari \
- | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
- ac_prev=x_libraries ;;
- -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
- | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
- x_libraries=$ac_optarg ;;
-
- -*) { echo "$as_me: error: unrecognized option: $ac_option
-Try \`$0 --help' for more information." >&2
- { (exit 1); exit 1; }; }
- ;;
-
- *=*)
- ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
- # Reject names that are not valid shell variable names.
- expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
- { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
- { (exit 1); exit 1; }; }
- ac_optarg=`echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"`
- eval "$ac_envvar='$ac_optarg'"
- export $ac_envvar ;;
-
- *)
- # FIXME: should be removed in autoconf 3.0.
- echo "$as_me: WARNING: you should use --build, --host, --target" >&2
- expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
- echo "$as_me: WARNING: invalid host type: $ac_option" >&2
- : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
- ;;
-
- esac
-done
-
-if test -n "$ac_prev"; then
- ac_option=--`echo $ac_prev | sed 's/_/-/g'`
- { echo "$as_me: error: missing argument to $ac_option" >&2
- { (exit 1); exit 1; }; }
-fi
-
-# Be sure to have absolute paths.
-for ac_var in exec_prefix prefix
-do
- eval ac_val=$`echo $ac_var`
- case $ac_val in
- [\\/$]* | ?:[\\/]* | NONE | '' ) ;;
- *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
- { (exit 1); exit 1; }; };;
- esac
-done
-
-# Be sure to have absolute paths.
-for ac_var in bindir sbindir libexecdir datadir sysconfdir sharedstatedir \
- localstatedir libdir includedir oldincludedir infodir mandir
-do
- eval ac_val=$`echo $ac_var`
- case $ac_val in
- [\\/$]* | ?:[\\/]* ) ;;
- *) { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
- { (exit 1); exit 1; }; };;
- esac
-done
-
-# There might be people who depend on the old broken behavior: `$host'
-# used to hold the argument of --host etc.
-# FIXME: To remove some day.
-build=$build_alias
-host=$host_alias
-target=$target_alias
-
-# FIXME: To remove some day.
-if test "x$host_alias" != x; then
- if test "x$build_alias" = x; then
- cross_compiling=maybe
- echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
- If a cross compiler is detected then cross compile mode will be used." >&2
- elif test "x$build_alias" != "x$host_alias"; then
- cross_compiling=yes
- fi
-fi
-
-ac_tool_prefix=
-test -n "$host_alias" && ac_tool_prefix=$host_alias-
-
-test "$silent" = yes && exec 6>/dev/null
-
-
-# Find the source files, if location was not specified.
-if test -z "$srcdir"; then
- ac_srcdir_defaulted=yes
- # Try the directory containing this script, then its parent.
- ac_confdir=`(dirname "$0") 2>/dev/null ||
-$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$0" : 'X\(//\)[^/]' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$0" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- srcdir=$ac_confdir
- if test ! -r $srcdir/$ac_unique_file; then
- srcdir=..
- fi
-else
- ac_srcdir_defaulted=no
-fi
-if test ! -r $srcdir/$ac_unique_file; then
- if test "$ac_srcdir_defaulted" = yes; then
- { echo "$as_me: error: cannot find sources ($ac_unique_file) in $ac_confdir or .." >&2
- { (exit 1); exit 1; }; }
- else
- { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
- { (exit 1); exit 1; }; }
- fi
-fi
-(cd $srcdir && test -r ./$ac_unique_file) 2>/dev/null ||
- { echo "$as_me: error: sources are in $srcdir, but \`cd $srcdir' does not work" >&2
- { (exit 1); exit 1; }; }
-srcdir=`echo "$srcdir" | sed 's%\([^\\/]\)[\\/]*$%\1%'`
-ac_env_build_alias_set=${build_alias+set}
-ac_env_build_alias_value=$build_alias
-ac_cv_env_build_alias_set=${build_alias+set}
-ac_cv_env_build_alias_value=$build_alias
-ac_env_host_alias_set=${host_alias+set}
-ac_env_host_alias_value=$host_alias
-ac_cv_env_host_alias_set=${host_alias+set}
-ac_cv_env_host_alias_value=$host_alias
-ac_env_target_alias_set=${target_alias+set}
-ac_env_target_alias_value=$target_alias
-ac_cv_env_target_alias_set=${target_alias+set}
-ac_cv_env_target_alias_value=$target_alias
-ac_env_CC_set=${CC+set}
-ac_env_CC_value=$CC
-ac_cv_env_CC_set=${CC+set}
-ac_cv_env_CC_value=$CC
-ac_env_CFLAGS_set=${CFLAGS+set}
-ac_env_CFLAGS_value=$CFLAGS
-ac_cv_env_CFLAGS_set=${CFLAGS+set}
-ac_cv_env_CFLAGS_value=$CFLAGS
-ac_env_LDFLAGS_set=${LDFLAGS+set}
-ac_env_LDFLAGS_value=$LDFLAGS
-ac_cv_env_LDFLAGS_set=${LDFLAGS+set}
-ac_cv_env_LDFLAGS_value=$LDFLAGS
-ac_env_CPPFLAGS_set=${CPPFLAGS+set}
-ac_env_CPPFLAGS_value=$CPPFLAGS
-ac_cv_env_CPPFLAGS_set=${CPPFLAGS+set}
-ac_cv_env_CPPFLAGS_value=$CPPFLAGS
-ac_env_CPP_set=${CPP+set}
-ac_env_CPP_value=$CPP
-ac_cv_env_CPP_set=${CPP+set}
-ac_cv_env_CPP_value=$CPP
-
-#
-# Report the --help message.
-#
-if test "$ac_init_help" = "long"; then
- # Omit some internal or obsolete options to make the list less imposing.
- # This message is too long to be a string in the A/UX 3.1 sh.
- cat <<_ACEOF
-\`configure' configures SQUID 1.9g to adapt to many kinds of systems.
-
-Usage: $0 [OPTION]... [VAR=VALUE]...
-
-To assign environment variables (e.g., CC, CFLAGS...), specify them as
-VAR=VALUE. See below for descriptions of some of the useful variables.
-
-Defaults for the options are specified in brackets.
-
-Configuration:
- -h, --help display this help and exit
- --help=short display options specific to this package
- --help=recursive display the short help of all the included packages
- -V, --version display version information and exit
- -q, --quiet, --silent do not print \`checking...' messages
- --cache-file=FILE cache test results in FILE [disabled]
- -C, --config-cache alias for \`--cache-file=config.cache'
- -n, --no-create do not create output files
- --srcdir=DIR find the sources in DIR [configure dir or \`..']
-
-_ACEOF
-
- cat <<_ACEOF
-Installation directories:
- --prefix=PREFIX install architecture-independent files in PREFIX
- [$ac_default_prefix]
- --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX
- [PREFIX]
-
-By default, \`make install' will install all the files in
-\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc. You can specify
-an installation prefix other than \`$ac_default_prefix' using \`--prefix',
-for instance \`--prefix=\$HOME'.
-
-For better control, use the options below.
-
-Fine tuning of the installation directories:
- --bindir=DIR user executables [EPREFIX/bin]
- --sbindir=DIR system admin executables [EPREFIX/sbin]
- --libexecdir=DIR program executables [EPREFIX/libexec]
- --datadir=DIR read-only architecture-independent data [PREFIX/share]
- --sysconfdir=DIR read-only single-machine data [PREFIX/etc]
- --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com]
- --localstatedir=DIR modifiable single-machine data [PREFIX/var]
- --libdir=DIR object code libraries [EPREFIX/lib]
- --includedir=DIR C header files [PREFIX/include]
- --oldincludedir=DIR C header files for non-gcc [/usr/include]
- --infodir=DIR info documentation [PREFIX/info]
- --mandir=DIR man documentation [PREFIX/man]
-_ACEOF
-
- cat <<\_ACEOF
-_ACEOF
-fi
-
-if test -n "$ac_init_help"; then
- case $ac_init_help in
- short | recursive ) echo "Configuration of SQUID 1.9g:";;
- esac
- cat <<\_ACEOF
-
-Optional Features:
- --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no)
- --enable-FEATURE[=ARG] include FEATURE [ARG=yes]
- --enable-ccmalloc turn ccmalloc memory debugging on
- --enable-debugging set CFLAGS for debugging
- --enable-debugging=x also set diagnostics level to <x> (1-3) (3 = most verbose)
- --enable-lfs enable LFS, Large File Support
- --enable-pvm enable PVM, Parallel Virtual Machine
-
-Some influential environment variables:
- CC C compiler command
- CFLAGS C compiler flags
- LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
- nonstandard directory <lib dir>
- CPPFLAGS C/C++ preprocessor flags, e.g. -I<include dir> if you have
- headers in a nonstandard directory <include dir>
- CPP C preprocessor
-
-Use these variables to override the choices made by `configure' or to help
-it to find libraries and programs with nonstandard names/locations.
-
-Report bugs to <eddy at genetics.wustl.edu>.
-_ACEOF
-fi
-
-if test "$ac_init_help" = "recursive"; then
- # If there are subdirs, report their specific --help.
- ac_popdir=`pwd`
- for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
- test -d $ac_dir || continue
- ac_builddir=.
-
-if test "$ac_dir" != .; then
- ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
- # A "../" for each directory in $ac_dir_suffix.
- ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
-else
- ac_dir_suffix= ac_top_builddir=
-fi
-
-case $srcdir in
- .) # No --srcdir option. We are building in place.
- ac_srcdir=.
- if test -z "$ac_top_builddir"; then
- ac_top_srcdir=.
- else
- ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
- fi ;;
- [\\/]* | ?:[\\/]* ) # Absolute path.
- ac_srcdir=$srcdir$ac_dir_suffix;
- ac_top_srcdir=$srcdir ;;
- *) # Relative path.
- ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
- ac_top_srcdir=$ac_top_builddir$srcdir ;;
-esac
-# Don't blindly perform a `cd "$ac_dir"/$ac_foo && pwd` since $ac_foo can be
-# absolute.
-ac_abs_builddir=`cd "$ac_dir" && cd $ac_builddir && pwd`
-ac_abs_top_builddir=`cd "$ac_dir" && cd ${ac_top_builddir}. && pwd`
-ac_abs_srcdir=`cd "$ac_dir" && cd $ac_srcdir && pwd`
-ac_abs_top_srcdir=`cd "$ac_dir" && cd $ac_top_srcdir && pwd`
-
- cd $ac_dir
- # Check for guested configure; otherwise get Cygnus style configure.
- if test -f $ac_srcdir/configure.gnu; then
- echo
- $SHELL $ac_srcdir/configure.gnu --help=recursive
- elif test -f $ac_srcdir/configure; then
- echo
- $SHELL $ac_srcdir/configure --help=recursive
- elif test -f $ac_srcdir/configure.ac ||
- test -f $ac_srcdir/configure.in; then
- echo
- $ac_configure --help
- else
- echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
- fi
- cd $ac_popdir
- done
-fi
-
-test -n "$ac_init_help" && exit 0
-if $ac_init_version; then
- cat <<\_ACEOF
-SQUID configure 1.9g
-generated by GNU Autoconf 2.57
-
-Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002
-Free Software Foundation, Inc.
-This configure script is free software; the Free Software Foundation
-gives unlimited permission to copy, distribute and modify it.
-_ACEOF
- exit 0
-fi
-exec 5>config.log
-cat >&5 <<_ACEOF
-This file contains any messages produced by compilers while
-running configure, to aid debugging if configure makes a mistake.
-
-It was created by SQUID $as_me 1.9g, which was
-generated by GNU Autoconf 2.57. Invocation command line was
-
- $ $0 $@
-
-_ACEOF
-{
-cat <<_ASUNAME
-## --------- ##
-## Platform. ##
-## --------- ##
-
-hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
-
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
-/bin/uname -X = `(/bin/uname -X) 2>/dev/null || echo unknown`
-
-/bin/arch = `(/bin/arch) 2>/dev/null || echo unknown`
-/usr/bin/arch -k = `(/usr/bin/arch -k) 2>/dev/null || echo unknown`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
-hostinfo = `(hostinfo) 2>/dev/null || echo unknown`
-/bin/machine = `(/bin/machine) 2>/dev/null || echo unknown`
-/usr/bin/oslevel = `(/usr/bin/oslevel) 2>/dev/null || echo unknown`
-/bin/universe = `(/bin/universe) 2>/dev/null || echo unknown`
-
-_ASUNAME
-
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- echo "PATH: $as_dir"
-done
-
-} >&5
-
-cat >&5 <<_ACEOF
-
-
-## ----------- ##
-## Core tests. ##
-## ----------- ##
-
-_ACEOF
-
-
-# Keep a trace of the command line.
-# Strip out --no-create and --no-recursion so they do not pile up.
-# Strip out --silent because we don't want to record it for future runs.
-# Also quote any args containing shell meta-characters.
-# Make two passes to allow for proper duplicate-argument suppression.
-ac_configure_args=
-ac_configure_args0=
-ac_configure_args1=
-ac_sep=
-ac_must_keep_next=false
-for ac_pass in 1 2
-do
- for ac_arg
- do
- case $ac_arg in
- -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
- -q | -quiet | --quiet | --quie | --qui | --qu | --q \
- | -silent | --silent | --silen | --sile | --sil)
- continue ;;
- *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
- ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
- esac
- case $ac_pass in
- 1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
- 2)
- ac_configure_args1="$ac_configure_args1 '$ac_arg'"
- if test $ac_must_keep_next = true; then
- ac_must_keep_next=false # Got value, back to normal.
- else
- case $ac_arg in
- *=* | --config-cache | -C | -disable-* | --disable-* \
- | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
- | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
- | -with-* | --with-* | -without-* | --without-* | --x)
- case "$ac_configure_args0 " in
- "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
- esac
- ;;
- -* ) ac_must_keep_next=true ;;
- esac
- fi
- ac_configure_args="$ac_configure_args$ac_sep'$ac_arg'"
- # Get rid of the leading space.
- ac_sep=" "
- ;;
- esac
- done
-done
-$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
-$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
-
-# When interrupted or exit'd, cleanup temporary files, and complete
-# config.log. We remove comments because anyway the quotes in there
-# would cause problems or look ugly.
-# WARNING: Be sure not to use single quotes in there, as some shells,
-# such as our DU 5.0 friend, will then `close' the trap.
-trap 'exit_status=$?
- # Save into config.log some information that might help in debugging.
- {
- echo
-
- cat <<\_ASBOX
-## ---------------- ##
-## Cache variables. ##
-## ---------------- ##
-_ASBOX
- echo
- # The following way of writing the cache mishandles newlines in values,
-{
- (set) 2>&1 |
- case `(ac_space='"'"' '"'"'; set | grep ac_space) 2>&1` in
- *ac_space=\ *)
- sed -n \
- "s/'"'"'/'"'"'\\\\'"'"''"'"'/g;
- s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='"'"'\\2'"'"'/p"
- ;;
- *)
- sed -n \
- "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
- ;;
- esac;
-}
- echo
-
- cat <<\_ASBOX
-## ----------------- ##
-## Output variables. ##
-## ----------------- ##
-_ASBOX
- echo
- for ac_var in $ac_subst_vars
- do
- eval ac_val=$`echo $ac_var`
- echo "$ac_var='"'"'$ac_val'"'"'"
- done | sort
- echo
-
- if test -n "$ac_subst_files"; then
- cat <<\_ASBOX
-## ------------- ##
-## Output files. ##
-## ------------- ##
-_ASBOX
- echo
- for ac_var in $ac_subst_files
- do
- eval ac_val=$`echo $ac_var`
- echo "$ac_var='"'"'$ac_val'"'"'"
- done | sort
- echo
- fi
-
- if test -s confdefs.h; then
- cat <<\_ASBOX
-## ----------- ##
-## confdefs.h. ##
-## ----------- ##
-_ASBOX
- echo
- sed "/^$/d" confdefs.h | sort
- echo
- fi
- test "$ac_signal" != 0 &&
- echo "$as_me: caught signal $ac_signal"
- echo "$as_me: exit $exit_status"
- } >&5
- rm -f core core.* *.core &&
- rm -rf conftest* confdefs* conf$$* $ac_clean_files &&
- exit $exit_status
- ' 0
-for ac_signal in 1 2 13 15; do
- trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
-done
-ac_signal=0
-
-# confdefs.h avoids OS command line length limits that DEFS can exceed.
-rm -rf conftest* confdefs.h
-# AIX cpp loses on an empty file, so make sure it contains at least a newline.
-echo >confdefs.h
-
-# Predefined preprocessor variables.
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_NAME "$PACKAGE_NAME"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_VERSION "$PACKAGE_VERSION"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_STRING "$PACKAGE_STRING"
-_ACEOF
-
-
-cat >>confdefs.h <<_ACEOF
-#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
-_ACEOF
-
-
-# Let the site file select an alternate cache file if it wants to.
-# Prefer explicitly selected file to automatically selected ones.
-if test -z "$CONFIG_SITE"; then
- if test "x$prefix" != xNONE; then
- CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site"
- else
- CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site"
- fi
-fi
-for ac_site_file in $CONFIG_SITE; do
- if test -r "$ac_site_file"; then
- { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
-echo "$as_me: loading site script $ac_site_file" >&6;}
- sed 's/^/| /' "$ac_site_file" >&5
- . "$ac_site_file"
- fi
-done
-
-if test -r "$cache_file"; then
- # Some versions of bash will fail to source /dev/null (special
- # files actually), so we avoid doing that.
- if test -f "$cache_file"; then
- { echo "$as_me:$LINENO: loading cache $cache_file" >&5
-echo "$as_me: loading cache $cache_file" >&6;}
- case $cache_file in
- [\\/]* | ?:[\\/]* ) . $cache_file;;
- *) . ./$cache_file;;
- esac
- fi
-else
- { echo "$as_me:$LINENO: creating cache $cache_file" >&5
-echo "$as_me: creating cache $cache_file" >&6;}
- >$cache_file
-fi
-
-# Check that the precious variables saved in the cache have kept the same
-# value.
-ac_cache_corrupted=false
-for ac_var in `(set) 2>&1 |
- sed -n 's/^ac_env_\([a-zA-Z_0-9]*\)_set=.*/\1/p'`; do
- eval ac_old_set=\$ac_cv_env_${ac_var}_set
- eval ac_new_set=\$ac_env_${ac_var}_set
- eval ac_old_val="\$ac_cv_env_${ac_var}_value"
- eval ac_new_val="\$ac_env_${ac_var}_value"
- case $ac_old_set,$ac_new_set in
- set,)
- { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
-echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
- ac_cache_corrupted=: ;;
- ,set)
- { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
-echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
- ac_cache_corrupted=: ;;
- ,);;
- *)
- if test "x$ac_old_val" != "x$ac_new_val"; then
- { echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
-echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
- { echo "$as_me:$LINENO: former value: $ac_old_val" >&5
-echo "$as_me: former value: $ac_old_val" >&2;}
- { echo "$as_me:$LINENO: current value: $ac_new_val" >&5
-echo "$as_me: current value: $ac_new_val" >&2;}
- ac_cache_corrupted=:
- fi;;
- esac
- # Pass precious variables to config.status.
- if test "$ac_new_set" = set; then
- case $ac_new_val in
- *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?\"\']*)
- ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
- *) ac_arg=$ac_var=$ac_new_val ;;
- esac
- case " $ac_configure_args " in
- *" '$ac_arg' "*) ;; # Avoid dups. Use of quotes ensures accuracy.
- *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
- esac
- fi
-done
-if $ac_cache_corrupted; then
- { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
-echo "$as_me: error: changes in the environment can compromise the build" >&2;}
- { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
-echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-{ echo "$as_me:$LINENO: configuring the SQUID library for your system." >&5
-echo "$as_me: configuring the SQUID library for your system." >&6;}
-
-SQUID_RELCODE="squid1_9g"
-SQUID_DATE="January 2003"
-SQUID_COPYRIGHT="Copyright (C) 1992-2003 HHMI/Washington University School of Medicine"
-SQUID_LICENSE="Freely distributed under the GNU General Public License (GPL)"
-SQUID_LICENSETAG=gnu
-SQUID_VERSION=$PACKAGE_VERSION
-
-# Make output variables.
-
-
-
-
-
-
-
-# Make preprocessor symbols.
-cat >>confdefs.h <<_ACEOF
-#define SQUID_DATE "$SQUID_DATE"
-_ACEOF
-
-cat >>confdefs.h <<_ACEOF
-#define SQUID_COPYRIGHT "$SQUID_COPYRIGHT"
-_ACEOF
-
-cat >>confdefs.h <<_ACEOF
-#define SQUID_LICENSE "$SQUID_LICENSE"
-_ACEOF
-
-cat >>confdefs.h <<_ACEOF
-#define SQUID_VERSION "$SQUID_VERSION"
-_ACEOF
-
-
-
-# Checks for programs.
-#
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
-set dummy ${ac_tool_prefix}gcc; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_CC+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_CC="${ac_tool_prefix}gcc"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-CC=$ac_cv_prog_CC
-if test -n "$CC"; then
- echo "$as_me:$LINENO: result: $CC" >&5
-echo "${ECHO_T}$CC" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
-fi
-if test -z "$ac_cv_prog_CC"; then
- ac_ct_CC=$CC
- # Extract the first word of "gcc", so it can be a program name with args.
-set dummy gcc; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$ac_ct_CC"; then
- ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_CC="gcc"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-ac_ct_CC=$ac_cv_prog_ac_ct_CC
-if test -n "$ac_ct_CC"; then
- echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
-echo "${ECHO_T}$ac_ct_CC" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- CC=$ac_ct_CC
-else
- CC="$ac_cv_prog_CC"
-fi
-
-if test -z "$CC"; then
- if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
-set dummy ${ac_tool_prefix}cc; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_CC+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_CC="${ac_tool_prefix}cc"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-CC=$ac_cv_prog_CC
-if test -n "$CC"; then
- echo "$as_me:$LINENO: result: $CC" >&5
-echo "${ECHO_T}$CC" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
-fi
-if test -z "$ac_cv_prog_CC"; then
- ac_ct_CC=$CC
- # Extract the first word of "cc", so it can be a program name with args.
-set dummy cc; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$ac_ct_CC"; then
- ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_CC="cc"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-ac_ct_CC=$ac_cv_prog_ac_ct_CC
-if test -n "$ac_ct_CC"; then
- echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
-echo "${ECHO_T}$ac_ct_CC" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- CC=$ac_ct_CC
-else
- CC="$ac_cv_prog_CC"
-fi
-
-fi
-if test -z "$CC"; then
- # Extract the first word of "cc", so it can be a program name with args.
-set dummy cc; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_CC+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
- ac_prog_rejected=no
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
- ac_prog_rejected=yes
- continue
- fi
- ac_cv_prog_CC="cc"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-if test $ac_prog_rejected = yes; then
- # We found a bogon in the path, so make sure we never use it.
- set dummy $ac_cv_prog_CC
- shift
- if test $# != 0; then
- # We chose a different compiler from the bogus one.
- # However, it has the same basename, so the bogon will be chosen
- # first if we set CC to just the basename; use the full file name.
- shift
- ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
- fi
-fi
-fi
-fi
-CC=$ac_cv_prog_CC
-if test -n "$CC"; then
- echo "$as_me:$LINENO: result: $CC" >&5
-echo "${ECHO_T}$CC" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
-fi
-if test -z "$CC"; then
- if test -n "$ac_tool_prefix"; then
- for ac_prog in cl
- do
- # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
-set dummy $ac_tool_prefix$ac_prog; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_CC+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$CC"; then
- ac_cv_prog_CC="$CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-CC=$ac_cv_prog_CC
-if test -n "$CC"; then
- echo "$as_me:$LINENO: result: $CC" >&5
-echo "${ECHO_T}$CC" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- test -n "$CC" && break
- done
-fi
-if test -z "$CC"; then
- ac_ct_CC=$CC
- for ac_prog in cl
-do
- # Extract the first word of "$ac_prog", so it can be a program name with args.
-set dummy $ac_prog; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$ac_ct_CC"; then
- ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_CC="$ac_prog"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-ac_ct_CC=$ac_cv_prog_ac_ct_CC
-if test -n "$ac_ct_CC"; then
- echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
-echo "${ECHO_T}$ac_ct_CC" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- test -n "$ac_ct_CC" && break
-done
-
- CC=$ac_ct_CC
-fi
-
-fi
-
-
-test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
-See \`config.log' for more details." >&5
-echo "$as_me: error: no acceptable C compiler found in \$PATH
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-
-# Provide some information about the compiler.
-echo "$as_me:$LINENO:" \
- "checking for C compiler version" >&5
-ac_compiler=`set X $ac_compile; echo $2`
-{ (eval echo "$as_me:$LINENO: \"$ac_compiler --version </dev/null >&5\"") >&5
- (eval $ac_compiler --version </dev/null >&5) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }
-{ (eval echo "$as_me:$LINENO: \"$ac_compiler -v </dev/null >&5\"") >&5
- (eval $ac_compiler -v </dev/null >&5) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }
-{ (eval echo "$as_me:$LINENO: \"$ac_compiler -V </dev/null >&5\"") >&5
- (eval $ac_compiler -V </dev/null >&5) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }
-
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-ac_clean_files_save=$ac_clean_files
-ac_clean_files="$ac_clean_files a.out a.exe b.out"
-# Try to create an executable without -o first, disregard a.out.
-# It will help us diagnose broken compilers, and finding out an intuition
-# of exeext.
-echo "$as_me:$LINENO: checking for C compiler default output" >&5
-echo $ECHO_N "checking for C compiler default output... $ECHO_C" >&6
-ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
-if { (eval echo "$as_me:$LINENO: \"$ac_link_default\"") >&5
- (eval $ac_link_default) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; then
- # Find the output, starting from the most likely. This scheme is
-# not robust to junk in `.', hence go to wildcards (a.*) only as a last
-# resort.
-
-# Be careful to initialize this variable, since it used to be cached.
-# Otherwise an old cache value of `no' led to `EXEEXT = no' in a Makefile.
-ac_cv_exeext=
-# b.out is created by i960 compilers.
-for ac_file in a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out
-do
- test -f "$ac_file" || continue
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj )
- ;;
- conftest.$ac_ext )
- # This is the source file.
- ;;
- [ab].out )
- # We found the default executable, but exeext='' is most
- # certainly right.
- break;;
- *.* )
- ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
- # FIXME: I believe we export ac_cv_exeext for Libtool,
- # but it would be cool to find out if it's true. Does anybody
- # maintain Libtool? --akim.
- export ac_cv_exeext
- break;;
- * )
- break;;
- esac
-done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-{ { echo "$as_me:$LINENO: error: C compiler cannot create executables
-See \`config.log' for more details." >&5
-echo "$as_me: error: C compiler cannot create executables
-See \`config.log' for more details." >&2;}
- { (exit 77); exit 77; }; }
-fi
-
-ac_exeext=$ac_cv_exeext
-echo "$as_me:$LINENO: result: $ac_file" >&5
-echo "${ECHO_T}$ac_file" >&6
-
-# Check the compiler produces executables we can run. If not, either
-# the compiler is broken, or we cross compile.
-echo "$as_me:$LINENO: checking whether the C compiler works" >&5
-echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6
-# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
-# If not cross compiling, check that we can run a simple program.
-if test "$cross_compiling" != yes; then
- if { ac_try='./$ac_file'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- cross_compiling=no
- else
- if test "$cross_compiling" = maybe; then
- cross_compiling=yes
- else
- { { echo "$as_me:$LINENO: error: cannot run C compiled programs.
-If you meant to cross compile, use \`--host'.
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot run C compiled programs.
-If you meant to cross compile, use \`--host'.
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
- fi
- fi
-fi
-echo "$as_me:$LINENO: result: yes" >&5
-echo "${ECHO_T}yes" >&6
-
-rm -f a.out a.exe conftest$ac_cv_exeext b.out
-ac_clean_files=$ac_clean_files_save
-# Check the compiler produces executables we can run. If not, either
-# the compiler is broken, or we cross compile.
-echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
-echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6
-echo "$as_me:$LINENO: result: $cross_compiling" >&5
-echo "${ECHO_T}$cross_compiling" >&6
-
-echo "$as_me:$LINENO: checking for suffix of executables" >&5
-echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; then
- # If both `conftest.exe' and `conftest' are `present' (well, observable)
-# catch `conftest.exe'. For instance with Cygwin, `ls conftest' will
-# work properly (i.e., refer to `conftest.exe'), while it won't with
-# `rm'.
-for ac_file in conftest.exe conftest conftest.*; do
- test -f "$ac_file" || continue
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.o | *.obj ) ;;
- *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
- export ac_cv_exeext
- break;;
- * ) break;;
- esac
-done
-else
- { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-rm -f conftest$ac_cv_exeext
-echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
-echo "${ECHO_T}$ac_cv_exeext" >&6
-
-rm -f conftest.$ac_ext
-EXEEXT=$ac_cv_exeext
-ac_exeext=$EXEEXT
-echo "$as_me:$LINENO: checking for suffix of object files" >&5
-echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6
-if test "${ac_cv_objext+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.o conftest.obj
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; then
- for ac_file in `(ls conftest.o conftest.obj; ls conftest.*) 2>/dev/null`; do
- case $ac_file in
- *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg ) ;;
- *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
- break;;
- esac
-done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute suffix of object files: cannot compile
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-rm -f conftest.$ac_cv_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
-echo "${ECHO_T}$ac_cv_objext" >&6
-OBJEXT=$ac_cv_objext
-ac_objext=$OBJEXT
-echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
-echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6
-if test "${ac_cv_c_compiler_gnu+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-int
-main ()
-{
-#ifndef __GNUC__
- choke me
-#endif
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_compiler_gnu=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_compiler_gnu=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-ac_cv_c_compiler_gnu=$ac_compiler_gnu
-
-fi
-echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
-echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6
-GCC=`test $ac_compiler_gnu = yes && echo yes`
-ac_test_CFLAGS=${CFLAGS+set}
-ac_save_CFLAGS=$CFLAGS
-CFLAGS="-g"
-echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
-echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6
-if test "${ac_cv_prog_cc_g+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_prog_cc_g=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_prog_cc_g=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
-echo "${ECHO_T}$ac_cv_prog_cc_g" >&6
-if test "$ac_test_CFLAGS" = set; then
- CFLAGS=$ac_save_CFLAGS
-elif test $ac_cv_prog_cc_g = yes; then
- if test "$GCC" = yes; then
- CFLAGS="-g -O2"
- else
- CFLAGS="-g"
- fi
-else
- if test "$GCC" = yes; then
- CFLAGS="-O2"
- else
- CFLAGS=
- fi
-fi
-echo "$as_me:$LINENO: checking for $CC option to accept ANSI C" >&5
-echo $ECHO_N "checking for $CC option to accept ANSI C... $ECHO_C" >&6
-if test "${ac_cv_prog_cc_stdc+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_cv_prog_cc_stdc=no
-ac_save_CC=$CC
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <stdarg.h>
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
-struct buf { int x; };
-FILE * (*rcsopen) (struct buf *, struct stat *, int);
-static char *e (p, i)
- char **p;
- int i;
-{
- return p[i];
-}
-static char *f (char * (*g) (char **, int), char **p, ...)
-{
- char *s;
- va_list v;
- va_start (v,p);
- s = g (p, va_arg (v,int));
- va_end (v);
- return s;
-}
-int test (int i, double x);
-struct s1 {int (*f) (int a);};
-struct s2 {int (*f) (double a);};
-int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
-int argc;
-char **argv;
-int
-main ()
-{
-return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1];
- ;
- return 0;
-}
-_ACEOF
-# Don't try gcc -ansi; that turns off useful extensions and
-# breaks some systems' header files.
-# AIX -qlanglvl=ansi
-# Ultrix and OSF/1 -std1
-# HP-UX 10.20 and later -Ae
-# HP-UX older versions -Aa -D_HPUX_SOURCE
-# SVR4 -Xc -D__EXTENSIONS__
-for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
-do
- CC="$ac_save_CC $ac_arg"
- rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_prog_cc_stdc=$ac_arg
-break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-fi
-rm -f conftest.$ac_objext
-done
-rm -f conftest.$ac_ext conftest.$ac_objext
-CC=$ac_save_CC
-
-fi
-
-case "x$ac_cv_prog_cc_stdc" in
- x|xno)
- echo "$as_me:$LINENO: result: none needed" >&5
-echo "${ECHO_T}none needed" >&6 ;;
- *)
- echo "$as_me:$LINENO: result: $ac_cv_prog_cc_stdc" >&5
-echo "${ECHO_T}$ac_cv_prog_cc_stdc" >&6
- CC="$CC $ac_cv_prog_cc_stdc" ;;
-esac
-
-# Some people use a C++ compiler to compile C. Since we use `exit',
-# in C++ we need to declare it. In case someone uses the same compiler
-# for both compiling C and C++ we need to have the C++ compiler decide
-# the declaration of exit, since it's the most demanding environment.
-cat >conftest.$ac_ext <<_ACEOF
-#ifndef __cplusplus
- choke me
-#endif
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- for ac_declaration in \
- ''\
- '#include <stdlib.h>' \
- 'extern "C" void std::exit (int) throw (); using std::exit;' \
- 'extern "C" void std::exit (int); using std::exit;' \
- 'extern "C" void exit (int) throw ();' \
- 'extern "C" void exit (int);' \
- 'void exit (int);'
-do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <stdlib.h>
-$ac_declaration
-int
-main ()
-{
-exit (42);
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- :
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-continue
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_declaration
-int
-main ()
-{
-exit (42);
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-done
-rm -f conftest*
-if test -n "$ac_declaration"; then
- echo '#ifdef __cplusplus' >>confdefs.h
- echo $ac_declaration >>confdefs.h
- echo '#endif' >>confdefs.h
-fi
-
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-if test -n "$ac_tool_prefix"; then
- # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
-set dummy ${ac_tool_prefix}ranlib; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_RANLIB+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$RANLIB"; then
- ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
-fi
-fi
-RANLIB=$ac_cv_prog_RANLIB
-if test -n "$RANLIB"; then
- echo "$as_me:$LINENO: result: $RANLIB" >&5
-echo "${ECHO_T}$RANLIB" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
-fi
-if test -z "$ac_cv_prog_RANLIB"; then
- ac_ct_RANLIB=$RANLIB
- # Extract the first word of "ranlib", so it can be a program name with args.
-set dummy ranlib; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test -n "$ac_ct_RANLIB"; then
- ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
-else
-as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_prog_ac_ct_RANLIB="ranlib"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
- test -z "$ac_cv_prog_ac_ct_RANLIB" && ac_cv_prog_ac_ct_RANLIB=":"
-fi
-fi
-ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
-if test -n "$ac_ct_RANLIB"; then
- echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5
-echo "${ECHO_T}$ac_ct_RANLIB" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
- RANLIB=$ac_ct_RANLIB
-else
- RANLIB="$ac_cv_prog_RANLIB"
-fi
-
-# Extract the first word of "ar", so it can be a program name with args.
-set dummy ar; ac_word=$2
-echo "$as_me:$LINENO: checking for $ac_word" >&5
-echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6
-if test "${ac_cv_path_AR+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- case $AR in
- [\\/]* | ?:[\\/]*)
- ac_cv_path_AR="$AR" # Let the user override the test with a path.
- ;;
- *)
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-as_dummy="$PATH:/usr/ccs/bin:/usr/xpg4/bin"
-for as_dir in $as_dummy
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for ac_exec_ext in '' $ac_executable_extensions; do
- if $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
- ac_cv_path_AR="$as_dir/$ac_word$ac_exec_ext"
- echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
- break 2
- fi
-done
-done
-
- test -z "$ac_cv_path_AR" && ac_cv_path_AR=":"
- ;;
-esac
-fi
-AR=$ac_cv_path_AR
-
-if test -n "$AR"; then
- echo "$as_me:$LINENO: result: $AR" >&5
-echo "${ECHO_T}$AR" >&6
-else
- echo "$as_me:$LINENO: result: no" >&5
-echo "${ECHO_T}no" >&6
-fi
-
-
-# GNU make check; format of makefile dependency lines for executables.
-# original from John Darrington <j.darrington at elvis.murdoch.edu.au>
-# w/ heavy modifications.
-#
-# We need this because GNU make and SYSV make use different systems
-# specifying variables for dependencies: $$@ in sysv, %: %.o in GNU.
-# Would love to hear a better way of doing this.
-#
-# I use two different conventions in my Makefiles. Sometimes
-# executable "foo" has a file "foo.c" - this is the HMMER convention.
-# Sometimes executable "foo" has a file "foo_main.c" - this is
-# the SQUID convention. The configure script sets the
-# EXEC_DEPENDENCY appropriately: here, HMMER style.
-#
-# This creates a function CHECK_GNU_MAKE, which we immediately call.
-# It sets an output variable EXEC_DEPENDENCY.
-# This is used in the src/Makefile.in.
-#
-
-
- echo "$as_me:$LINENO: checking whether your make is GNU make" >&5
-echo $ECHO_N "checking whether your make is GNU make... $ECHO_C" >&6
- foundGNUmake='nope, assuming sysv make.' ;
- EXEC_DEPENDENCY=\$\$\@_main.o ;
- if ( make --version nothing 2> /dev/null | grep GNU > /dev/null ) ; then
- foundGNUmake='yes, it is.' ;
- EXEC_DEPENDENCY='%: %_main.o' ;
- fi
- echo "$as_me:$LINENO: result: $foundGNUmake" >&5
-echo "${ECHO_T}$foundGNUmake" >&6
-
-
-
-
-# ================================================================
-# Provide for unsigned integers of known size
-# Sets SQD_UINT16, 32, and 64.
-# Will substitute for @SQD_UINT16@, etc. in an output file (config.h)
-# Substitutes "FIXME" if no appropriate typedef is found.
-# This info is substituted in squid.h.
-# SRE, Thu Dec 28 13:58:51 2000
-#
-
-echo "$as_me:$LINENO: checking whether byte ordering is bigendian" >&5
-echo $ECHO_N "checking whether byte ordering is bigendian... $ECHO_C" >&6
-if test "${ac_cv_c_bigendian+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- # See if sys/param.h defines the BYTE_ORDER macro.
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <sys/types.h>
-#include <sys/param.h>
-
-int
-main ()
-{
-#if !BYTE_ORDER || !BIG_ENDIAN || !LITTLE_ENDIAN
- bogus endian macros
-#endif
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- # It does; now see whether it defined to BIG_ENDIAN or not.
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <sys/types.h>
-#include <sys/param.h>
-
-int
-main ()
-{
-#if BYTE_ORDER != BIG_ENDIAN
- not big endian
-#endif
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_c_bigendian=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_c_bigendian=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-# It does not; compile a test program.
-if test "$cross_compiling" = yes; then
- # try to guess the endianness by grepping values into an object file
- ac_cv_c_bigendian=unknown
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-short ascii_mm[] = { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
-short ascii_ii[] = { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
-void _ascii () { char *s = (char *) ascii_mm; s = (char *) ascii_ii; }
-short ebcdic_ii[] = { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
-short ebcdic_mm[] = { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
-void _ebcdic () { char *s = (char *) ebcdic_mm; s = (char *) ebcdic_ii; }
-int
-main ()
-{
- _ascii (); _ebcdic ();
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- if grep BIGenDianSyS conftest.$ac_objext >/dev/null ; then
- ac_cv_c_bigendian=yes
-fi
-if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
- if test "$ac_cv_c_bigendian" = unknown; then
- ac_cv_c_bigendian=no
- else
- # finding both strings is unlikely to happen, but who knows?
- ac_cv_c_bigendian=unknown
- fi
-fi
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-int
-main ()
-{
- /* Are we little or big endian? From Harbison&Steele. */
- union
- {
- long l;
- char c[sizeof (long)];
- } u;
- u.l = 1;
- exit (u.c[sizeof (long) - 1] == 1);
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_c_bigendian=no
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-ac_cv_c_bigendian=yes
-fi
-rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_c_bigendian" >&5
-echo "${ECHO_T}$ac_cv_c_bigendian" >&6
-case $ac_cv_c_bigendian in
- yes)
-
-cat >>confdefs.h <<\_ACEOF
-#define WORDS_BIGENDIAN 1
-_ACEOF
- ;;
- no)
- ;;
- *)
- { { echo "$as_me:$LINENO: error: unknown endianness
-presetting ac_cv_c_bigendian=no (or yes) will help" >&5
-echo "$as_me: error: unknown endianness
-presetting ac_cv_c_bigendian=no (or yes) will help" >&2;}
- { (exit 1); exit 1; }; } ;;
-esac
-
-
-for ac_func in ntohs
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
-if eval "test \"\${$as_ac_var+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $ac_func (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
-choke me
-#else
-char (*f) () = $ac_func;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != $ac_func;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_var=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_var=no"
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
-if test `eval echo '${'$as_ac_var'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
-else
-
-echo "$as_me:$LINENO: checking for ntohs in -lsocket" >&5
-echo $ECHO_N "checking for ntohs in -lsocket... $ECHO_C" >&6
-if test "${ac_cv_lib_socket_ntohs+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lsocket $LIBS"
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char ntohs ();
-int
-main ()
-{
-ntohs ();
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_lib_socket_ntohs=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_lib_socket_ntohs=no
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-echo "$as_me:$LINENO: result: $ac_cv_lib_socket_ntohs" >&5
-echo "${ECHO_T}$ac_cv_lib_socket_ntohs" >&6
-if test $ac_cv_lib_socket_ntohs = yes; then
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBSOCKET 1
-_ACEOF
-
- LIBS="-lsocket $LIBS"
-
-fi
-
-fi
-done
-
-
-for ac_func in ntohl
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
-if eval "test \"\${$as_ac_var+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $ac_func (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
-choke me
-#else
-char (*f) () = $ac_func;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != $ac_func;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_var=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_var=no"
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
-if test `eval echo '${'$as_ac_var'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
-else
-
-echo "$as_me:$LINENO: checking for ntohl in -lsocket" >&5
-echo $ECHO_N "checking for ntohl in -lsocket... $ECHO_C" >&6
-if test "${ac_cv_lib_socket_ntohl+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lsocket $LIBS"
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char ntohl ();
-int
-main ()
-{
-ntohl ();
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_lib_socket_ntohl=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_lib_socket_ntohl=no
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-echo "$as_me:$LINENO: result: $ac_cv_lib_socket_ntohl" >&5
-echo "${ECHO_T}$ac_cv_lib_socket_ntohl" >&6
-if test $ac_cv_lib_socket_ntohl = yes; then
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBSOCKET 1
-_ACEOF
-
- LIBS="-lsocket $LIBS"
-
-fi
-
-fi
-done
-
-
-for ac_func in htons
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
-if eval "test \"\${$as_ac_var+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $ac_func (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
-choke me
-#else
-char (*f) () = $ac_func;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != $ac_func;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_var=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_var=no"
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
-if test `eval echo '${'$as_ac_var'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
-else
-
-echo "$as_me:$LINENO: checking for htons in -lsocket" >&5
-echo $ECHO_N "checking for htons in -lsocket... $ECHO_C" >&6
-if test "${ac_cv_lib_socket_htons+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lsocket $LIBS"
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char htons ();
-int
-main ()
-{
-htons ();
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_lib_socket_htons=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_lib_socket_htons=no
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-echo "$as_me:$LINENO: result: $ac_cv_lib_socket_htons" >&5
-echo "${ECHO_T}$ac_cv_lib_socket_htons" >&6
-if test $ac_cv_lib_socket_htons = yes; then
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBSOCKET 1
-_ACEOF
-
- LIBS="-lsocket $LIBS"
-
-fi
-
-fi
-done
-
-
-for ac_func in htonl
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
-if eval "test \"\${$as_ac_var+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $ac_func (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
-choke me
-#else
-char (*f) () = $ac_func;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != $ac_func;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_var=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_var=no"
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
-if test `eval echo '${'$as_ac_var'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
-else
-
-echo "$as_me:$LINENO: checking for htonl in -lsocket" >&5
-echo $ECHO_N "checking for htonl in -lsocket... $ECHO_C" >&6
-if test "${ac_cv_lib_socket_htonl+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lsocket $LIBS"
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char htonl ();
-int
-main ()
-{
-htonl ();
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_lib_socket_htonl=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_lib_socket_htonl=no
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-echo "$as_me:$LINENO: result: $ac_cv_lib_socket_htonl" >&5
-echo "${ECHO_T}$ac_cv_lib_socket_htonl" >&6
-if test $ac_cv_lib_socket_htonl = yes; then
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBSOCKET 1
-_ACEOF
-
- LIBS="-lsocket $LIBS"
-
-fi
-
-fi
-done
-
-
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
-echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6
-# On Suns, sometimes $CPP names a directory.
-if test -n "$CPP" && test -d "$CPP"; then
- CPP=
-fi
-if test -z "$CPP"; then
- if test "${ac_cv_prog_CPP+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- # Double quotes because CPP needs to be expanded
- for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
- do
- ac_preproc_ok=false
-for ac_c_preproc_warn_flag in '' yes
-do
- # Use a header file that comes with gcc, so configuring glibc
- # with a fresh cross-compiler works.
- # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- # <limits.h> exists even on freestanding compilers.
- # On the NeXT, cc -E runs the code through the compiler's parser,
- # not just through cpp. "Syntax error" is here to catch this case.
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
- Syntax error
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_c_preproc_warn_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- :
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.$ac_ext
-
- # OK, works on sane cases. Now check whether non-existent headers
- # can be detected and how.
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <ac_nonexistent.h>
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_c_preproc_warn_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- # Broken: success on invalid input.
-continue
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then
- break
-fi
-
- done
- ac_cv_prog_CPP=$CPP
-
-fi
- CPP=$ac_cv_prog_CPP
-else
- ac_cv_prog_CPP=$CPP
-fi
-echo "$as_me:$LINENO: result: $CPP" >&5
-echo "${ECHO_T}$CPP" >&6
-ac_preproc_ok=false
-for ac_c_preproc_warn_flag in '' yes
-do
- # Use a header file that comes with gcc, so configuring glibc
- # with a fresh cross-compiler works.
- # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- # <limits.h> exists even on freestanding compilers.
- # On the NeXT, cc -E runs the code through the compiler's parser,
- # not just through cpp. "Syntax error" is here to catch this case.
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
- Syntax error
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_c_preproc_warn_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- :
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- # Broken: fails on valid input.
-continue
-fi
-rm -f conftest.err conftest.$ac_ext
-
- # OK, works on sane cases. Now check whether non-existent headers
- # can be detected and how.
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <ac_nonexistent.h>
-_ACEOF
-if { (eval echo "$as_me:$LINENO: \"$ac_cpp conftest.$ac_ext\"") >&5
- (eval $ac_cpp conftest.$ac_ext) 2>conftest.er1
- ac_status=$?
- grep -v '^ *+' conftest.er1 >conftest.err
- rm -f conftest.er1
- cat conftest.err >&5
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } >/dev/null; then
- if test -s conftest.err; then
- ac_cpp_err=$ac_c_preproc_warn_flag
- else
- ac_cpp_err=
- fi
-else
- ac_cpp_err=yes
-fi
-if test -z "$ac_cpp_err"; then
- # Broken: success on invalid input.
-continue
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- # Passes both tests.
-ac_preproc_ok=:
-break
-fi
-rm -f conftest.err conftest.$ac_ext
-
-done
-# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
-if $ac_preproc_ok; then
- :
-else
- { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check
-See \`config.log' for more details." >&5
-echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-
-ac_ext=c
-ac_cpp='$CPP $CPPFLAGS'
-ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
-ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
-ac_compiler_gnu=$ac_cv_c_compiler_gnu
-
-
-echo "$as_me:$LINENO: checking for egrep" >&5
-echo $ECHO_N "checking for egrep... $ECHO_C" >&6
-if test "${ac_cv_prog_egrep+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if echo a | (grep -E '(a|b)') >/dev/null 2>&1
- then ac_cv_prog_egrep='grep -E'
- else ac_cv_prog_egrep='egrep'
- fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_prog_egrep" >&5
-echo "${ECHO_T}$ac_cv_prog_egrep" >&6
- EGREP=$ac_cv_prog_egrep
-
-
-echo "$as_me:$LINENO: checking for ANSI C header files" >&5
-echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6
-if test "${ac_cv_header_stdc+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <float.h>
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_header_stdc=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_header_stdc=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-
-if test $ac_cv_header_stdc = yes; then
- # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <string.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "memchr" >/dev/null 2>&1; then
- :
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <stdlib.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "free" >/dev/null 2>&1; then
- :
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
- if test "$cross_compiling" = yes; then
- :
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <ctype.h>
-#if ((' ' & 0x0FF) == 0x020)
-# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
-# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
-#else
-# define ISLOWER(c) \
- (('a' <= (c) && (c) <= 'i') \
- || ('j' <= (c) && (c) <= 'r') \
- || ('s' <= (c) && (c) <= 'z'))
-# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
-#endif
-
-#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
-int
-main ()
-{
- int i;
- for (i = 0; i < 256; i++)
- if (XOR (islower (i), ISLOWER (i))
- || toupper (i) != TOUPPER (i))
- exit(2);
- exit (0);
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- :
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-ac_cv_header_stdc=no
-fi
-rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
-echo "${ECHO_T}$ac_cv_header_stdc" >&6
-if test $ac_cv_header_stdc = yes; then
-
-cat >>confdefs.h <<\_ACEOF
-#define STDC_HEADERS 1
-_ACEOF
-
-fi
-
-# On IRIX 5.3, sys/types and inttypes.h are conflicting.
-
-
-
-
-
-
-
-
-
-for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
- inttypes.h stdint.h unistd.h
-do
-as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_header" >&5
-echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6
-if eval "test \"\${$as_ac_Header+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-
-#include <$ac_header>
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_Header=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_Header=no"
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_Header'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_Header'}'`" >&6
-if test `eval echo '${'$as_ac_Header'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-
-done
-
-
-echo "$as_me:$LINENO: checking for unsigned short" >&5
-echo $ECHO_N "checking for unsigned short... $ECHO_C" >&6
-if test "${ac_cv_type_unsigned_short+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-if ((unsigned short *) 0)
- return 0;
-if (sizeof (unsigned short))
- return 0;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_type_unsigned_short=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_type_unsigned_short=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_type_unsigned_short" >&5
-echo "${ECHO_T}$ac_cv_type_unsigned_short" >&6
-
-echo "$as_me:$LINENO: checking size of unsigned short" >&5
-echo $ECHO_N "checking size of unsigned short... $ECHO_C" >&6
-if test "${ac_cv_sizeof_unsigned_short+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test "$ac_cv_type_unsigned_short" = yes; then
- # The cast to unsigned long works around a bug in the HP C Compiler
- # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
- # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
- # This bug is HP SR number 8606223364.
- if test "$cross_compiling" = yes; then
- # Depending upon the size, compute the lo and hi bounds.
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned short))) >= 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=0 ac_mid=0
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned short))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr $ac_mid + 1`
- if test $ac_lo -le $ac_mid; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned short))) < 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=-1 ac_mid=-1
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned short))) >= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_hi=`expr '(' $ac_mid ')' - 1`
- if test $ac_mid -le $ac_hi; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo= ac_hi=
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-# Binary search between lo and hi bounds.
-while test "x$ac_lo" != "x$ac_hi"; do
- ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned short))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr '(' $ac_mid ')' + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-done
-case $ac_lo in
-?*) ac_cv_sizeof_unsigned_short=$ac_lo;;
-'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (unsigned short), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (unsigned short), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; } ;;
-esac
-else
- if test "$cross_compiling" = yes; then
- { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-long longval () { return (long) (sizeof (unsigned short)); }
-unsigned long ulongval () { return (long) (sizeof (unsigned short)); }
-#include <stdio.h>
-#include <stdlib.h>
-int
-main ()
-{
-
- FILE *f = fopen ("conftest.val", "w");
- if (! f)
- exit (1);
- if (((long) (sizeof (unsigned short))) < 0)
- {
- long i = longval ();
- if (i != ((long) (sizeof (unsigned short))))
- exit (1);
- fprintf (f, "%ld\n", i);
- }
- else
- {
- unsigned long i = ulongval ();
- if (i != ((long) (sizeof (unsigned short))))
- exit (1);
- fprintf (f, "%lu\n", i);
- }
- exit (ferror (f) || fclose (f) != 0);
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_sizeof_unsigned_short=`cat conftest.val`
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-{ { echo "$as_me:$LINENO: error: cannot compute sizeof (unsigned short), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (unsigned short), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-rm -f conftest.val
-else
- ac_cv_sizeof_unsigned_short=0
-fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_sizeof_unsigned_short" >&5
-echo "${ECHO_T}$ac_cv_sizeof_unsigned_short" >&6
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_UNSIGNED_SHORT $ac_cv_sizeof_unsigned_short
-_ACEOF
-
-
-echo "$as_me:$LINENO: checking for unsigned int" >&5
-echo $ECHO_N "checking for unsigned int... $ECHO_C" >&6
-if test "${ac_cv_type_unsigned_int+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-if ((unsigned int *) 0)
- return 0;
-if (sizeof (unsigned int))
- return 0;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_type_unsigned_int=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_type_unsigned_int=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_type_unsigned_int" >&5
-echo "${ECHO_T}$ac_cv_type_unsigned_int" >&6
-
-echo "$as_me:$LINENO: checking size of unsigned int" >&5
-echo $ECHO_N "checking size of unsigned int... $ECHO_C" >&6
-if test "${ac_cv_sizeof_unsigned_int+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test "$ac_cv_type_unsigned_int" = yes; then
- # The cast to unsigned long works around a bug in the HP C Compiler
- # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
- # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
- # This bug is HP SR number 8606223364.
- if test "$cross_compiling" = yes; then
- # Depending upon the size, compute the lo and hi bounds.
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned int))) >= 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=0 ac_mid=0
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned int))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr $ac_mid + 1`
- if test $ac_lo -le $ac_mid; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned int))) < 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=-1 ac_mid=-1
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned int))) >= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_hi=`expr '(' $ac_mid ')' - 1`
- if test $ac_mid -le $ac_hi; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo= ac_hi=
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-# Binary search between lo and hi bounds.
-while test "x$ac_lo" != "x$ac_hi"; do
- ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned int))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr '(' $ac_mid ')' + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-done
-case $ac_lo in
-?*) ac_cv_sizeof_unsigned_int=$ac_lo;;
-'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (unsigned int), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (unsigned int), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; } ;;
-esac
-else
- if test "$cross_compiling" = yes; then
- { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-long longval () { return (long) (sizeof (unsigned int)); }
-unsigned long ulongval () { return (long) (sizeof (unsigned int)); }
-#include <stdio.h>
-#include <stdlib.h>
-int
-main ()
-{
-
- FILE *f = fopen ("conftest.val", "w");
- if (! f)
- exit (1);
- if (((long) (sizeof (unsigned int))) < 0)
- {
- long i = longval ();
- if (i != ((long) (sizeof (unsigned int))))
- exit (1);
- fprintf (f, "%ld\n", i);
- }
- else
- {
- unsigned long i = ulongval ();
- if (i != ((long) (sizeof (unsigned int))))
- exit (1);
- fprintf (f, "%lu\n", i);
- }
- exit (ferror (f) || fclose (f) != 0);
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_sizeof_unsigned_int=`cat conftest.val`
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-{ { echo "$as_me:$LINENO: error: cannot compute sizeof (unsigned int), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (unsigned int), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-rm -f conftest.val
-else
- ac_cv_sizeof_unsigned_int=0
-fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_sizeof_unsigned_int" >&5
-echo "${ECHO_T}$ac_cv_sizeof_unsigned_int" >&6
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_UNSIGNED_INT $ac_cv_sizeof_unsigned_int
-_ACEOF
-
-
-echo "$as_me:$LINENO: checking for unsigned long" >&5
-echo $ECHO_N "checking for unsigned long... $ECHO_C" >&6
-if test "${ac_cv_type_unsigned_long+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-if ((unsigned long *) 0)
- return 0;
-if (sizeof (unsigned long))
- return 0;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_type_unsigned_long=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_type_unsigned_long=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_type_unsigned_long" >&5
-echo "${ECHO_T}$ac_cv_type_unsigned_long" >&6
-
-echo "$as_me:$LINENO: checking size of unsigned long" >&5
-echo $ECHO_N "checking size of unsigned long... $ECHO_C" >&6
-if test "${ac_cv_sizeof_unsigned_long+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test "$ac_cv_type_unsigned_long" = yes; then
- # The cast to unsigned long works around a bug in the HP C Compiler
- # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
- # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
- # This bug is HP SR number 8606223364.
- if test "$cross_compiling" = yes; then
- # Depending upon the size, compute the lo and hi bounds.
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long))) >= 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=0 ac_mid=0
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr $ac_mid + 1`
- if test $ac_lo -le $ac_mid; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long))) < 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=-1 ac_mid=-1
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long))) >= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_hi=`expr '(' $ac_mid ')' - 1`
- if test $ac_mid -le $ac_hi; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo= ac_hi=
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-# Binary search between lo and hi bounds.
-while test "x$ac_lo" != "x$ac_hi"; do
- ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr '(' $ac_mid ')' + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-done
-case $ac_lo in
-?*) ac_cv_sizeof_unsigned_long=$ac_lo;;
-'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (unsigned long), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (unsigned long), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; } ;;
-esac
-else
- if test "$cross_compiling" = yes; then
- { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-long longval () { return (long) (sizeof (unsigned long)); }
-unsigned long ulongval () { return (long) (sizeof (unsigned long)); }
-#include <stdio.h>
-#include <stdlib.h>
-int
-main ()
-{
-
- FILE *f = fopen ("conftest.val", "w");
- if (! f)
- exit (1);
- if (((long) (sizeof (unsigned long))) < 0)
- {
- long i = longval ();
- if (i != ((long) (sizeof (unsigned long))))
- exit (1);
- fprintf (f, "%ld\n", i);
- }
- else
- {
- unsigned long i = ulongval ();
- if (i != ((long) (sizeof (unsigned long))))
- exit (1);
- fprintf (f, "%lu\n", i);
- }
- exit (ferror (f) || fclose (f) != 0);
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_sizeof_unsigned_long=`cat conftest.val`
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-{ { echo "$as_me:$LINENO: error: cannot compute sizeof (unsigned long), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (unsigned long), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-rm -f conftest.val
-else
- ac_cv_sizeof_unsigned_long=0
-fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_sizeof_unsigned_long" >&5
-echo "${ECHO_T}$ac_cv_sizeof_unsigned_long" >&6
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_UNSIGNED_LONG $ac_cv_sizeof_unsigned_long
-_ACEOF
-
-
-echo "$as_me:$LINENO: checking for unsigned long long" >&5
-echo $ECHO_N "checking for unsigned long long... $ECHO_C" >&6
-if test "${ac_cv_type_unsigned_long_long+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-if ((unsigned long long *) 0)
- return 0;
-if (sizeof (unsigned long long))
- return 0;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_type_unsigned_long_long=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_type_unsigned_long_long=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_type_unsigned_long_long" >&5
-echo "${ECHO_T}$ac_cv_type_unsigned_long_long" >&6
-
-echo "$as_me:$LINENO: checking size of unsigned long long" >&5
-echo $ECHO_N "checking size of unsigned long long... $ECHO_C" >&6
-if test "${ac_cv_sizeof_unsigned_long_long+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test "$ac_cv_type_unsigned_long_long" = yes; then
- # The cast to unsigned long works around a bug in the HP C Compiler
- # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
- # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
- # This bug is HP SR number 8606223364.
- if test "$cross_compiling" = yes; then
- # Depending upon the size, compute the lo and hi bounds.
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long long))) >= 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=0 ac_mid=0
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long long))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr $ac_mid + 1`
- if test $ac_lo -le $ac_mid; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long long))) < 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=-1 ac_mid=-1
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long long))) >= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_hi=`expr '(' $ac_mid ')' - 1`
- if test $ac_mid -le $ac_hi; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo= ac_hi=
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-# Binary search between lo and hi bounds.
-while test "x$ac_lo" != "x$ac_hi"; do
- ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (unsigned long long))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr '(' $ac_mid ')' + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-done
-case $ac_lo in
-?*) ac_cv_sizeof_unsigned_long_long=$ac_lo;;
-'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (unsigned long long), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (unsigned long long), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; } ;;
-esac
-else
- if test "$cross_compiling" = yes; then
- { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-long longval () { return (long) (sizeof (unsigned long long)); }
-unsigned long ulongval () { return (long) (sizeof (unsigned long long)); }
-#include <stdio.h>
-#include <stdlib.h>
-int
-main ()
-{
-
- FILE *f = fopen ("conftest.val", "w");
- if (! f)
- exit (1);
- if (((long) (sizeof (unsigned long long))) < 0)
- {
- long i = longval ();
- if (i != ((long) (sizeof (unsigned long long))))
- exit (1);
- fprintf (f, "%ld\n", i);
- }
- else
- {
- unsigned long i = ulongval ();
- if (i != ((long) (sizeof (unsigned long long))))
- exit (1);
- fprintf (f, "%lu\n", i);
- }
- exit (ferror (f) || fclose (f) != 0);
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_sizeof_unsigned_long_long=`cat conftest.val`
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-{ { echo "$as_me:$LINENO: error: cannot compute sizeof (unsigned long long), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (unsigned long long), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-rm -f conftest.val
-else
- ac_cv_sizeof_unsigned_long_long=0
-fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_sizeof_unsigned_long_long" >&5
-echo "${ECHO_T}$ac_cv_sizeof_unsigned_long_long" >&6
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_UNSIGNED_LONG_LONG $ac_cv_sizeof_unsigned_long_long
-_ACEOF
-
-
-
-
-for ac_func in strtoul strtoull
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
-if eval "test \"\${$as_ac_var+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $ac_func (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
-choke me
-#else
-char (*f) () = $ac_func;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != $ac_func;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_var=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_var=no"
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
-if test `eval echo '${'$as_ac_var'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-done
-
-if test "$ac_cv_sizeof_unsigned_short" = "2"; then
- SQD_UINT16="unsigned short "
-else
- SQD_UINT16="FIXME"
- { echo "$as_me:$LINENO: WARNING: \"No 16-bit int? Manually edit config file to typedef sqd_uint16.\"" >&5
-echo "$as_me: WARNING: \"No 16-bit int? Manually edit config file to typedef sqd_uint16.\"" >&2;}
-fi
-if test "$ac_cv_sizeof_unsigned_int" = "4"; then
- SQD_UINT32="unsigned int "
-elif test "$ac_cv_sizeof_unsigned_long" = "4"; then
- SQD_UINT32="unsigned long "
-else
- SQD_UINT32="FIXME"
- { echo "$as_me:$LINENO: WARNING: \"No 32-bit int? Manually edit config file to typedef sqd_uint32.\"" >&5
-echo "$as_me: WARNING: \"No 32-bit int? Manually edit config file to typedef sqd_uint32.\"" >&2;}
-fi
-if test "$ac_cv_sizeof_unsigned_long" = "8"; then
- SQD_UINT64="unsigned long "
-elif test "$ac_cv_sizeof_unsigned_long_long" = "8"; then
- SQD_UINT64="unsigned long long"
-else
- SQD_UINT64="FIXME"
- { echo "$as_me:$LINENO: WARNING: \"No 64-bit int? Manually edit config file to typedef sqd_uint64.\"" >&5
-echo "$as_me: WARNING: \"No 64-bit int? Manually edit config file to typedef sqd_uint64.\"" >&2;}
-fi
-
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-# ================================================================
-# Test for whether we can cheat and treat fpos_t
-# (used by fgetpos() and fsetpos()) as an arithmetic datatype.
-# This is essential if we're to be able to save an fpos_t to a file
-# in an architecture-neutral format. We need this on FreeBSD
-# systems, which don't provide a 64-bit ftell64() or ftello(),
-# so we have to use fgetpos().
-#
-# if true, squidconf.h will contain
-# #define ARITHMETIC_FPOS_T 1
-# else if false,
-# #undef ARITHMETIC_FPOS_T
-
-
- echo "$as_me:$LINENO: checking whether fpos_t is an arithmetic datatype" >&5
-echo $ECHO_N "checking whether fpos_t is an arithmetic datatype... $ECHO_C" >&6
- fpos_arithmetic="no."
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <stdio.h>
-int
-main ()
-{
-int main(void) { fpos_t f1, f2; if (f1 == f2) f1 = 0;}
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- cat >>confdefs.h <<\_ACEOF
-#define ARITHMETIC_FPOS_T 1
-_ACEOF
-
- fpos_arithmetic="yes."
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- echo "$as_me:$LINENO: result: $fpos_arithmetic" >&5
-echo "${ECHO_T}$fpos_arithmetic" >&6
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-# ================================================================
-# Tests for 64-bit file offset functions
-# Note: only AC_CHECK_FUNCS seems to properly define HAVE_FOO?
-#
-
-for ac_func in strtoull
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
-if eval "test \"\${$as_ac_var+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $ac_func (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
-choke me
-#else
-char (*f) () = $ac_func;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != $ac_func;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_var=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_var=no"
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
-if test `eval echo '${'$as_ac_var'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-done
-
-
-
-for ac_func in ftello fseeko
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
-if eval "test \"\${$as_ac_var+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $ac_func (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
-choke me
-#else
-char (*f) () = $ac_func;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != $ac_func;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_var=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_var=no"
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
-if test `eval echo '${'$as_ac_var'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-done
-
-
-
-for ac_func in ftello64 fseeko64
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
-if eval "test \"\${$as_ac_var+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $ac_func (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
-choke me
-#else
-char (*f) () = $ac_func;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != $ac_func;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_var=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_var=no"
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
-if test `eval echo '${'$as_ac_var'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-done
-
-
-
-for ac_func in ftell64 fseek64
-do
-as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
-echo "$as_me:$LINENO: checking for $ac_func" >&5
-echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6
-if eval "test \"\${$as_ac_var+set}\" = set"; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char $ac_func (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char $ac_func ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_$ac_func) || defined (__stub___$ac_func)
-choke me
-#else
-char (*f) () = $ac_func;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != $ac_func;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- eval "$as_ac_var=yes"
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-eval "$as_ac_var=no"
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: `eval echo '${'$as_ac_var'}'`" >&5
-echo "${ECHO_T}`eval echo '${'$as_ac_var'}'`" >&6
-if test `eval echo '${'$as_ac_var'}'` = yes; then
- cat >>confdefs.h <<_ACEOF
-#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-done
-
-echo "$as_me:$LINENO: checking for stat64" >&5
-echo $ECHO_N "checking for stat64... $ECHO_C" >&6
-if test "${ac_cv_func_stat64+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-/* System header to define __stub macros and hopefully few prototypes,
- which can conflict with char stat64 (); below.
- Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
- <limits.h> exists even on freestanding compilers. */
-#ifdef __STDC__
-# include <limits.h>
-#else
-# include <assert.h>
-#endif
-/* Override any gcc2 internal prototype to avoid an error. */
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-/* We use char because int might match the return type of a gcc2
- builtin and then its argument prototype would still apply. */
-char stat64 ();
-/* The GNU C library defines this for functions which it implements
- to always fail with ENOSYS. Some functions are actually named
- something starting with __ and the normal name is an alias. */
-#if defined (__stub_stat64) || defined (__stub___stat64)
-choke me
-#else
-char (*f) () = stat64;
-#endif
-#ifdef __cplusplus
-}
-#endif
-
-int
-main ()
-{
-return f != stat64;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_func_stat64=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_func_stat64=no
-fi
-rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_func_stat64" >&5
-echo "${ECHO_T}$ac_cv_func_stat64" >&6
-if test $ac_cv_func_stat64 = yes; then
- echo "$as_me:$LINENO: checking for struct stat64" >&5
-echo $ECHO_N "checking for struct stat64... $ECHO_C" >&6
- stat64_struct="no!"
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-#include <sys/types.h>
- #include <sys/stat.h>
- #include <unistd.h>
-int
-main ()
-{
-int main(void) { struct stat64 s1;}
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- cat >>confdefs.h <<\_ACEOF
-#define HAVE_STAT64 1
-_ACEOF
-
- stat64_struct="yes."
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- echo "$as_me:$LINENO: result: $stat64_struct" >&5
-echo "${ECHO_T}$stat64_struct" >&6
-
-fi
-
-echo "$as_me:$LINENO: checking for off_t" >&5
-echo $ECHO_N "checking for off_t... $ECHO_C" >&6
-if test "${ac_cv_type_off_t+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-if ((off_t *) 0)
- return 0;
-if (sizeof (off_t))
- return 0;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_type_off_t=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_type_off_t=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_type_off_t" >&5
-echo "${ECHO_T}$ac_cv_type_off_t" >&6
-
-echo "$as_me:$LINENO: checking size of off_t" >&5
-echo $ECHO_N "checking size of off_t... $ECHO_C" >&6
-if test "${ac_cv_sizeof_off_t+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test "$ac_cv_type_off_t" = yes; then
- # The cast to unsigned long works around a bug in the HP C Compiler
- # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
- # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
- # This bug is HP SR number 8606223364.
- if test "$cross_compiling" = yes; then
- # Depending upon the size, compute the lo and hi bounds.
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off_t))) >= 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=0 ac_mid=0
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off_t))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr $ac_mid + 1`
- if test $ac_lo -le $ac_mid; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off_t))) < 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=-1 ac_mid=-1
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off_t))) >= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_hi=`expr '(' $ac_mid ')' - 1`
- if test $ac_mid -le $ac_hi; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo= ac_hi=
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-# Binary search between lo and hi bounds.
-while test "x$ac_lo" != "x$ac_hi"; do
- ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off_t))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr '(' $ac_mid ')' + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-done
-case $ac_lo in
-?*) ac_cv_sizeof_off_t=$ac_lo;;
-'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (off_t), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (off_t), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; } ;;
-esac
-else
- if test "$cross_compiling" = yes; then
- { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-long longval () { return (long) (sizeof (off_t)); }
-unsigned long ulongval () { return (long) (sizeof (off_t)); }
-#include <stdio.h>
-#include <stdlib.h>
-int
-main ()
-{
-
- FILE *f = fopen ("conftest.val", "w");
- if (! f)
- exit (1);
- if (((long) (sizeof (off_t))) < 0)
- {
- long i = longval ();
- if (i != ((long) (sizeof (off_t))))
- exit (1);
- fprintf (f, "%ld\n", i);
- }
- else
- {
- unsigned long i = ulongval ();
- if (i != ((long) (sizeof (off_t))))
- exit (1);
- fprintf (f, "%lu\n", i);
- }
- exit (ferror (f) || fclose (f) != 0);
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_sizeof_off_t=`cat conftest.val`
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-{ { echo "$as_me:$LINENO: error: cannot compute sizeof (off_t), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (off_t), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-rm -f conftest.val
-else
- ac_cv_sizeof_off_t=0
-fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_sizeof_off_t" >&5
-echo "${ECHO_T}$ac_cv_sizeof_off_t" >&6
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_OFF_T $ac_cv_sizeof_off_t
-_ACEOF
-
-
-echo "$as_me:$LINENO: checking for off64_t" >&5
-echo $ECHO_N "checking for off64_t... $ECHO_C" >&6
-if test "${ac_cv_type_off64_t+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-if ((off64_t *) 0)
- return 0;
-if (sizeof (off64_t))
- return 0;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_type_off64_t=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_type_off64_t=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_type_off64_t" >&5
-echo "${ECHO_T}$ac_cv_type_off64_t" >&6
-
-echo "$as_me:$LINENO: checking size of off64_t" >&5
-echo $ECHO_N "checking size of off64_t... $ECHO_C" >&6
-if test "${ac_cv_sizeof_off64_t+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test "$ac_cv_type_off64_t" = yes; then
- # The cast to unsigned long works around a bug in the HP C Compiler
- # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
- # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
- # This bug is HP SR number 8606223364.
- if test "$cross_compiling" = yes; then
- # Depending upon the size, compute the lo and hi bounds.
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off64_t))) >= 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=0 ac_mid=0
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off64_t))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr $ac_mid + 1`
- if test $ac_lo -le $ac_mid; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off64_t))) < 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=-1 ac_mid=-1
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off64_t))) >= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_hi=`expr '(' $ac_mid ')' - 1`
- if test $ac_mid -le $ac_hi; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo= ac_hi=
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-# Binary search between lo and hi bounds.
-while test "x$ac_lo" != "x$ac_hi"; do
- ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (off64_t))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr '(' $ac_mid ')' + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-done
-case $ac_lo in
-?*) ac_cv_sizeof_off64_t=$ac_lo;;
-'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (off64_t), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (off64_t), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; } ;;
-esac
-else
- if test "$cross_compiling" = yes; then
- { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-long longval () { return (long) (sizeof (off64_t)); }
-unsigned long ulongval () { return (long) (sizeof (off64_t)); }
-#include <stdio.h>
-#include <stdlib.h>
-int
-main ()
-{
-
- FILE *f = fopen ("conftest.val", "w");
- if (! f)
- exit (1);
- if (((long) (sizeof (off64_t))) < 0)
- {
- long i = longval ();
- if (i != ((long) (sizeof (off64_t))))
- exit (1);
- fprintf (f, "%ld\n", i);
- }
- else
- {
- unsigned long i = ulongval ();
- if (i != ((long) (sizeof (off64_t))))
- exit (1);
- fprintf (f, "%lu\n", i);
- }
- exit (ferror (f) || fclose (f) != 0);
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_sizeof_off64_t=`cat conftest.val`
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-{ { echo "$as_me:$LINENO: error: cannot compute sizeof (off64_t), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (off64_t), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-rm -f conftest.val
-else
- ac_cv_sizeof_off64_t=0
-fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_sizeof_off64_t" >&5
-echo "${ECHO_T}$ac_cv_sizeof_off64_t" >&6
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_OFF64_T $ac_cv_sizeof_off64_t
-_ACEOF
-
-
-echo "$as_me:$LINENO: checking for fpos_t" >&5
-echo $ECHO_N "checking for fpos_t... $ECHO_C" >&6
-if test "${ac_cv_type_fpos_t+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-if ((fpos_t *) 0)
- return 0;
-if (sizeof (fpos_t))
- return 0;
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_type_fpos_t=yes
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_cv_type_fpos_t=no
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-echo "$as_me:$LINENO: result: $ac_cv_type_fpos_t" >&5
-echo "${ECHO_T}$ac_cv_type_fpos_t" >&6
-
-echo "$as_me:$LINENO: checking size of fpos_t" >&5
-echo $ECHO_N "checking size of fpos_t... $ECHO_C" >&6
-if test "${ac_cv_sizeof_fpos_t+set}" = set; then
- echo $ECHO_N "(cached) $ECHO_C" >&6
-else
- if test "$ac_cv_type_fpos_t" = yes; then
- # The cast to unsigned long works around a bug in the HP C Compiler
- # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
- # declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
- # This bug is HP SR number 8606223364.
- if test "$cross_compiling" = yes; then
- # Depending upon the size, compute the lo and hi bounds.
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (fpos_t))) >= 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=0 ac_mid=0
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (fpos_t))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr $ac_mid + 1`
- if test $ac_lo -le $ac_mid; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (fpos_t))) < 0)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=-1 ac_mid=-1
- while :; do
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (fpos_t))) >= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_lo=$ac_mid; break
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_hi=`expr '(' $ac_mid ')' - 1`
- if test $ac_mid -le $ac_hi; then
- ac_lo= ac_hi=
- break
- fi
- ac_mid=`expr 2 '*' $ac_mid`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
- done
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo= ac_hi=
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-# Binary search between lo and hi bounds.
-while test "x$ac_lo" != "x$ac_hi"; do
- ac_mid=`expr '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo`
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-int
-main ()
-{
-static int test_array [1 - 2 * !(((long) (sizeof (fpos_t))) <= $ac_mid)];
-test_array [0] = 0
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest.$ac_objext
-if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
- (eval $ac_compile) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } &&
- { ac_try='test -s conftest.$ac_objext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_hi=$ac_mid
-else
- echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-ac_lo=`expr '(' $ac_mid ')' + 1`
-fi
-rm -f conftest.$ac_objext conftest.$ac_ext
-done
-case $ac_lo in
-?*) ac_cv_sizeof_fpos_t=$ac_lo;;
-'') { { echo "$as_me:$LINENO: error: cannot compute sizeof (fpos_t), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (fpos_t), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; } ;;
-esac
-else
- if test "$cross_compiling" = yes; then
- { { echo "$as_me:$LINENO: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot run test program while cross compiling
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-else
- cat >conftest.$ac_ext <<_ACEOF
-#line $LINENO "configure"
-/* confdefs.h. */
-_ACEOF
-cat confdefs.h >>conftest.$ac_ext
-cat >>conftest.$ac_ext <<_ACEOF
-/* end confdefs.h. */
-$ac_includes_default
-long longval () { return (long) (sizeof (fpos_t)); }
-unsigned long ulongval () { return (long) (sizeof (fpos_t)); }
-#include <stdio.h>
-#include <stdlib.h>
-int
-main ()
-{
-
- FILE *f = fopen ("conftest.val", "w");
- if (! f)
- exit (1);
- if (((long) (sizeof (fpos_t))) < 0)
- {
- long i = longval ();
- if (i != ((long) (sizeof (fpos_t))))
- exit (1);
- fprintf (f, "%ld\n", i);
- }
- else
- {
- unsigned long i = ulongval ();
- if (i != ((long) (sizeof (fpos_t))))
- exit (1);
- fprintf (f, "%lu\n", i);
- }
- exit (ferror (f) || fclose (f) != 0);
-
- ;
- return 0;
-}
-_ACEOF
-rm -f conftest$ac_exeext
-if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
- (eval $ac_link) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
- { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- echo "$as_me:$LINENO: \$? = $ac_status" >&5
- (exit $ac_status); }; }; then
- ac_cv_sizeof_fpos_t=`cat conftest.val`
-else
- echo "$as_me: program exited with status $ac_status" >&5
-echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
-( exit $ac_status )
-{ { echo "$as_me:$LINENO: error: cannot compute sizeof (fpos_t), 77
-See \`config.log' for more details." >&5
-echo "$as_me: error: cannot compute sizeof (fpos_t), 77
-See \`config.log' for more details." >&2;}
- { (exit 1); exit 1; }; }
-fi
-rm -f core core.* *.core gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
-fi
-fi
-rm -f conftest.val
-else
- ac_cv_sizeof_fpos_t=0
-fi
-fi
-echo "$as_me:$LINENO: result: $ac_cv_sizeof_fpos_t" >&5
-echo "${ECHO_T}$ac_cv_sizeof_fpos_t" >&6
-cat >>confdefs.h <<_ACEOF
-#define SIZEOF_FPOS_T $ac_cv_sizeof_fpos_t
-_ACEOF
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-#################################################################################
-# Now we're into our "optional features"
-#################################################################################
-
-
-# --enable-ccmalloc - configure for compiling in ccmalloc memory checks
-#
-# The CC output variable is modified.
-# Requires ccmalloc version >= 0.4.0, because it uses --no-wrapper.
-#
-# Check whether --enable-ccmalloc or --disable-ccmalloc was given.
-if test "${enable_ccmalloc+set}" = set; then
- enableval="$enable_ccmalloc"
- case $enable_ccmalloc in
- yes) { echo "$as_me:$LINENO: enabled ccmalloc memory debugging" >&5
-echo "$as_me: enabled ccmalloc memory debugging" >&6;}
- CC="ccmalloc --no-wrapper $CC"
- ;;
- no) { echo "$as_me:$LINENO: ccmalloc memory debug/tracing disabled" >&5
-echo "$as_me: ccmalloc memory debug/tracing disabled" >&6;}
- ;;
- *) echo "Ignoring unknown argument to --enable-ccmalloc: $enable_ccmalloc"
- ;;
-esac
-fi;
-
-
-
-# --enable-debugging=x - set debugging level to <x> (1-3)
-#
-# At all levels, including 0, replaces CFLAGS w/ "-g -Wall" (so it assumes gcc).
-# Sets the DEBUGLEVEL preprocessor symbol to <x>
-#
-# Check whether --enable-debugging or --disable-debugging was given.
-if test "${enable_debugging+set}" = set; then
- enableval="$enable_debugging"
- case $enable_debugging in
- yes) { echo "$as_me:$LINENO: enabled debugging diagnostics level 0 (CFLAGS only, no verbosity)" >&5
-echo "$as_me: enabled debugging diagnostics level 0 (CFLAGS only, no verbosity)" >&6;}
- CFLAGS="-g -Wall"
- cat >>confdefs.h <<\_ACEOF
-#define DEBUGLEVEL 0
-_ACEOF
-
- ;;
- 1) { echo "$as_me:$LINENO: enabled debugging diagnostics level 1 (low verbosity)" >&5
-echo "$as_me: enabled debugging diagnostics level 1 (low verbosity)" >&6;}
- CFLAGS="-g -Wall"
- cat >>confdefs.h <<\_ACEOF
-#define DEBUGLEVEL 1
-_ACEOF
-
- ;;
- 2) { echo "$as_me:$LINENO: enabled debugging diagnostics level 2 (moderate verbosity)" >&5
-echo "$as_me: enabled debugging diagnostics level 2 (moderate verbosity)" >&6;}
- CFLAGS="-g -Wall"
- cat >>confdefs.h <<\_ACEOF
-#define DEBUGLEVEL 2
-_ACEOF
-
- ;;
- 3) { echo "$as_me:$LINENO: enabled debugging diagnostics level 3 (high verbosity)" >&5
-echo "$as_me: enabled debugging diagnostics level 3 (high verbosity)" >&6;}
- CFLAGS="-g -Wall"
- cat >>confdefs.h <<\_ACEOF
-#define DEBUGLEVEL 3
-_ACEOF
-
- ;;
- no) { echo "$as_me:$LINENO: debugging diagnostics disabled" >&5
-echo "$as_me: debugging diagnostics disabled" >&6;}
- cat >>confdefs.h <<\_ACEOF
-#define DEBUGLEVEL 0
-_ACEOF
-
- ;;
- *) echo "Ignoring unknown argument to --enable-debugging: $enable_debugging"
- ;;
-esac
-fi;
-
-
-
-# --enable-lfs Large File Summit (LFS) support for >2GB files
-# See: http://ftp.sas.com/standards/large.file/x_open.20Mar96.html
-#
-# Check whether --enable-lfs or --disable-lfs was given.
-if test "${enable_lfs+set}" = set; then
- enableval="$enable_lfs"
- case $enable_lfs in
- yes) { echo "$as_me:$LINENO: configured for optional LFS, large file support" >&5
-echo "$as_me: configured for optional LFS, large file support" >&6;}
- cat >>confdefs.h <<\_ACEOF
-#define _LARGEFILE_SOURCE 1
-_ACEOF
-
- cat >>confdefs.h <<\_ACEOF
-#define _LARGEFILE64_SOURCE 1
-_ACEOF
-
- cat >>confdefs.h <<\_ACEOF
-#define _FILE_OFFSET_BITS 64
-_ACEOF
-
- ;;
- no) ;;
- *) echo "Ignoring unknown argument to --enable-lfs: $enable_lfs"
- ;;
-esac
-fi;
-
-
-
-# --enable-pvm Enable Parallel Virtual Machine (PVM) support
-#
-# Sets PVMLIBDIR, PVMINCDIR, PCMPROGS, PVMLIBS output variables
-# Sets SRE_ENABLE_PVM preprocessor variable.
-#
-# Check whether --enable-pvm or --disable-pvm was given.
-if test "${enable_pvm+set}" = set; then
- enableval="$enable_pvm"
- case $enable_pvm in
- yes) { echo "$as_me:$LINENO: enabled optional PVM (Parallel Virtual Machine) support" >&5
-echo "$as_me: enabled optional PVM (Parallel Virtual Machine) support" >&6;}
- PVMLIBDIR="-L${PVM_ROOT}/lib/${PVM_ARCH}"
- PVMINCDIR="-I${PVM_ROOT}/include"
- PVMLIBS="-lpvm3"
- cat >>confdefs.h <<\_ACEOF
-#define SRE_ENABLE_PVM 1
-_ACEOF
-
- ;;
- no) { echo "$as_me:$LINENO: PVM (Parallel Virtual machine) support disabled" >&5
-echo "$as_me: PVM (Parallel Virtual machine) support disabled" >&6;}
- ;;
- *) echo "Ignoring unknown argument to --enable-pvm: $enable_pvm"
- ;;
-esac
-fi;
-
-
-
-
-
-# Write out squidconf.h header
- ac_config_headers="$ac_config_headers squidconf.h"
-
-
-# Write out Makefiles, and squid.h also has some output variable substitution.
- ac_config_files="$ac_config_files Makefile"
-
- ac_config_files="$ac_config_files Testsuite/Makefile"
-
- ac_config_files="$ac_config_files squid.h"
-
-
-cat >confcache <<\_ACEOF
-# This file is a shell script that caches the results of configure
-# tests run on this system so they can be shared between configure
-# scripts and configure runs, see configure's option --config-cache.
-# It is not useful on other systems. If it contains results you don't
-# want to keep, you may remove or edit it.
-#
-# config.status only pays attention to the cache file if you give it
-# the --recheck option to rerun configure.
-#
-# `ac_cv_env_foo' variables (set or unset) will be overridden when
-# loading this file, other *unset* `ac_cv_foo' will be assigned the
-# following values.
-
-_ACEOF
-
-# The following way of writing the cache mishandles newlines in values,
-# but we know of no workaround that is simple, portable, and efficient.
-# So, don't put newlines in cache variables' values.
-# Ultrix sh set writes to stderr and can't be redirected directly,
-# and sets the high bit in the cache file unless we assign to the vars.
-{
- (set) 2>&1 |
- case `(ac_space=' '; set | grep ac_space) 2>&1` in
- *ac_space=\ *)
- # `set' does not quote correctly, so add quotes (double-quote
- # substitution turns \\\\ into \\, and sed turns \\ into \).
- sed -n \
- "s/'/'\\\\''/g;
- s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
- ;;
- *)
- # `set' quotes correctly as required by POSIX, so do not add quotes.
- sed -n \
- "s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1=\\2/p"
- ;;
- esac;
-} |
- sed '
- t clear
- : clear
- s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
- t end
- /^ac_cv_env/!s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
- : end' >>confcache
-if diff $cache_file confcache >/dev/null 2>&1; then :; else
- if test -w $cache_file; then
- test "x$cache_file" != "x/dev/null" && echo "updating cache $cache_file"
- cat confcache >$cache_file
- else
- echo "not updating unwritable cache $cache_file"
- fi
-fi
-rm -f confcache
-
-test "x$prefix" = xNONE && prefix=$ac_default_prefix
-# Let make expand exec_prefix.
-test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
-
-# VPATH may cause trouble with some makes, so we remove $(srcdir),
-# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
-# trailing colons and then remove the whole line if VPATH becomes empty
-# (actually we leave an empty line to preserve line numbers).
-if test "x$srcdir" = x.; then
- ac_vpsub='/^[ ]*VPATH[ ]*=/{
-s/:*\$(srcdir):*/:/;
-s/:*\${srcdir}:*/:/;
-s/:*@srcdir@:*/:/;
-s/^\([^=]*=[ ]*\):*/\1/;
-s/:*$//;
-s/^[^=]*=[ ]*$//;
-}'
-fi
-
-DEFS=-DHAVE_CONFIG_H
-
-ac_libobjs=
-ac_ltlibobjs=
-for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
- # 1. Remove the extension, and $U if already installed.
- ac_i=`echo "$ac_i" |
- sed 's/\$U\././;s/\.o$//;s/\.obj$//'`
- # 2. Add them.
- ac_libobjs="$ac_libobjs $ac_i\$U.$ac_objext"
- ac_ltlibobjs="$ac_ltlibobjs $ac_i"'$U.lo'
-done
-LIBOBJS=$ac_libobjs
-
-LTLIBOBJS=$ac_ltlibobjs
-
-
-
-: ${CONFIG_STATUS=./config.status}
-ac_clean_files_save=$ac_clean_files
-ac_clean_files="$ac_clean_files $CONFIG_STATUS"
-{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
-echo "$as_me: creating $CONFIG_STATUS" >&6;}
-cat >$CONFIG_STATUS <<_ACEOF
-#! $SHELL
-# Generated by $as_me.
-# Run this file to recreate the current configuration.
-# Compiler output produced by configure, useful for debugging
-# configure, is in config.log if it exists.
-
-debug=false
-ac_cs_recheck=false
-ac_cs_silent=false
-SHELL=\${CONFIG_SHELL-$SHELL}
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-## --------------------- ##
-## M4sh Initialization. ##
-## --------------------- ##
-
-# Be Bourne compatible
-if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
- emulate sh
- NULLCMD=:
- # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
- # is contrary to our usage. Disable this feature.
- alias -g '${1+"$@"}'='"$@"'
-elif test -n "${BASH_VERSION+set}" && (set -o posix) >/dev/null 2>&1; then
- set -o posix
-fi
-
-# Support unset when possible.
-if (FOO=FOO; unset FOO) >/dev/null 2>&1; then
- as_unset=unset
-else
- as_unset=false
-fi
-
-
-# Work around bugs in pre-3.0 UWIN ksh.
-$as_unset ENV MAIL MAILPATH
-PS1='$ '
-PS2='> '
-PS4='+ '
-
-# NLS nuisances.
-for as_var in \
- LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
- LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
- LC_TELEPHONE LC_TIME
-do
- if (set +x; test -n "`(eval $as_var=C; export $as_var) 2>&1`"); then
- eval $as_var=C; export $as_var
- else
- $as_unset $as_var
- fi
-done
-
-# Required to use basename.
-if expr a : '\(a\)' >/dev/null 2>&1; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-if (basename /) >/dev/null 2>&1 && test "X`basename / 2>&1`" = "X/"; then
- as_basename=basename
-else
- as_basename=false
-fi
-
-
-# Name of the executable.
-as_me=`$as_basename "$0" ||
-$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
- X"$0" : 'X\(//\)$' \| \
- X"$0" : 'X\(/\)$' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X/"$0" |
- sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/; q; }
- /^X\/\(\/\/\)$/{ s//\1/; q; }
- /^X\/\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
-
-
-# PATH needs CR, and LINENO needs CR and PATH.
-# Avoid depending upon Character Ranges.
-as_cr_letters='abcdefghijklmnopqrstuvwxyz'
-as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
-as_cr_Letters=$as_cr_letters$as_cr_LETTERS
-as_cr_digits='0123456789'
-as_cr_alnum=$as_cr_Letters$as_cr_digits
-
-# The user is always right.
-if test "${PATH_SEPARATOR+set}" != set; then
- echo "#! /bin/sh" >conf$$.sh
- echo "exit 0" >>conf$$.sh
- chmod +x conf$$.sh
- if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
- PATH_SEPARATOR=';'
- else
- PATH_SEPARATOR=:
- fi
- rm -f conf$$.sh
-fi
-
-
- as_lineno_1=$LINENO
- as_lineno_2=$LINENO
- as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
- test "x$as_lineno_1" != "x$as_lineno_2" &&
- test "x$as_lineno_3" = "x$as_lineno_2" || {
- # Find who we are. Look in the path if we contain no path at all
- # relative or not.
- case $0 in
- *[\\/]* ) as_myself=$0 ;;
- *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
-done
-
- ;;
- esac
- # We did not find ourselves, most probably we were run as `sh COMMAND'
- # in which case we are not to be found in the path.
- if test "x$as_myself" = x; then
- as_myself=$0
- fi
- if test ! -f "$as_myself"; then
- { { echo "$as_me:$LINENO: error: cannot find myself; rerun with an absolute path" >&5
-echo "$as_me: error: cannot find myself; rerun with an absolute path" >&2;}
- { (exit 1); exit 1; }; }
- fi
- case $CONFIG_SHELL in
- '')
- as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
-do
- IFS=$as_save_IFS
- test -z "$as_dir" && as_dir=.
- for as_base in sh bash ksh sh5; do
- case $as_dir in
- /*)
- if ("$as_dir/$as_base" -c '
- as_lineno_1=$LINENO
- as_lineno_2=$LINENO
- as_lineno_3=`(expr $as_lineno_1 + 1) 2>/dev/null`
- test "x$as_lineno_1" != "x$as_lineno_2" &&
- test "x$as_lineno_3" = "x$as_lineno_2" ') 2>/dev/null; then
- $as_unset BASH_ENV || test "${BASH_ENV+set}" != set || { BASH_ENV=; export BASH_ENV; }
- $as_unset ENV || test "${ENV+set}" != set || { ENV=; export ENV; }
- CONFIG_SHELL=$as_dir/$as_base
- export CONFIG_SHELL
- exec "$CONFIG_SHELL" "$0" ${1+"$@"}
- fi;;
- esac
- done
-done
-;;
- esac
-
- # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
- # uniformly replaced by the line number. The first 'sed' inserts a
- # line-number line before each line; the second 'sed' does the real
- # work. The second script uses 'N' to pair each line-number line
- # with the numbered line, and appends trailing '-' during
- # substitution so that $LINENO is not a special case at line end.
- # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
- # second 'sed' script. Blame Lee E. McMahon for sed's syntax. :-)
- sed '=' <$as_myself |
- sed '
- N
- s,$,-,
- : loop
- s,^\(['$as_cr_digits']*\)\(.*\)[$]LINENO\([^'$as_cr_alnum'_]\),\1\2\1\3,
- t loop
- s,-$,,
- s,^['$as_cr_digits']*\n,,
- ' >$as_me.lineno &&
- chmod +x $as_me.lineno ||
- { { echo "$as_me:$LINENO: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&5
-echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2;}
- { (exit 1); exit 1; }; }
-
- # Don't try to exec as it changes $[0], causing all sort of problems
- # (the dirname of $[0] is not the place where we might find the
- # original and so on. Autoconf is especially sensible to this).
- . ./$as_me.lineno
- # Exit status is that of the last command.
- exit
-}
-
-
-case `echo "testing\c"; echo 1,2,3`,`echo -n testing; echo 1,2,3` in
- *c*,-n*) ECHO_N= ECHO_C='
-' ECHO_T=' ' ;;
- *c*,* ) ECHO_N=-n ECHO_C= ECHO_T= ;;
- *) ECHO_N= ECHO_C='\c' ECHO_T= ;;
-esac
-
-if expr a : '\(a\)' >/dev/null 2>&1; then
- as_expr=expr
-else
- as_expr=false
-fi
-
-rm -f conf$$ conf$$.exe conf$$.file
-echo >conf$$.file
-if ln -s conf$$.file conf$$ 2>/dev/null; then
- # We could just check for DJGPP; but this test a) works b) is more generic
- # and c) will remain valid once DJGPP supports symlinks (DJGPP 2.04).
- if test -f conf$$.exe; then
- # Don't use ln at all; we don't have any links
- as_ln_s='cp -p'
- else
- as_ln_s='ln -s'
- fi
-elif ln conf$$.file conf$$ 2>/dev/null; then
- as_ln_s=ln
-else
- as_ln_s='cp -p'
-fi
-rm -f conf$$ conf$$.exe conf$$.file
-
-if mkdir -p . 2>/dev/null; then
- as_mkdir_p=:
-else
- as_mkdir_p=false
-fi
-
-as_executable_p="test -f"
-
-# Sed expression to map a string onto a valid CPP name.
-as_tr_cpp="sed y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g"
-
-# Sed expression to map a string onto a valid variable name.
-as_tr_sh="sed y%*+%pp%;s%[^_$as_cr_alnum]%_%g"
-
-
-# IFS
-# We need space, tab and new line, in precisely that order.
-as_nl='
-'
-IFS=" $as_nl"
-
-# CDPATH.
-$as_unset CDPATH
-
-exec 6>&1
-
-# Open the log real soon, to keep \$[0] and so on meaningful, and to
-# report actual input values of CONFIG_FILES etc. instead of their
-# values after options handling. Logging --version etc. is OK.
-exec 5>>config.log
-{
- echo
- sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
-## Running $as_me. ##
-_ASBOX
-} >&5
-cat >&5 <<_CSEOF
-
-This file was extended by SQUID $as_me 1.9g, which was
-generated by GNU Autoconf 2.57. Invocation command line was
-
- CONFIG_FILES = $CONFIG_FILES
- CONFIG_HEADERS = $CONFIG_HEADERS
- CONFIG_LINKS = $CONFIG_LINKS
- CONFIG_COMMANDS = $CONFIG_COMMANDS
- $ $0 $@
-
-_CSEOF
-echo "on `(hostname || uname -n) 2>/dev/null | sed 1q`" >&5
-echo >&5
-_ACEOF
-
-# Files that config.status was made for.
-if test -n "$ac_config_files"; then
- echo "config_files=\"$ac_config_files\"" >>$CONFIG_STATUS
-fi
-
-if test -n "$ac_config_headers"; then
- echo "config_headers=\"$ac_config_headers\"" >>$CONFIG_STATUS
-fi
-
-if test -n "$ac_config_links"; then
- echo "config_links=\"$ac_config_links\"" >>$CONFIG_STATUS
-fi
-
-if test -n "$ac_config_commands"; then
- echo "config_commands=\"$ac_config_commands\"" >>$CONFIG_STATUS
-fi
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-
-ac_cs_usage="\
-\`$as_me' instantiates files from templates according to the
-current configuration.
-
-Usage: $0 [OPTIONS] [FILE]...
-
- -h, --help print this help, then exit
- -V, --version print version number, then exit
- -q, --quiet do not print progress messages
- -d, --debug don't remove temporary files
- --recheck update $as_me by reconfiguring in the same conditions
- --file=FILE[:TEMPLATE]
- instantiate the configuration file FILE
- --header=FILE[:TEMPLATE]
- instantiate the configuration header FILE
-
-Configuration files:
-$config_files
-
-Configuration headers:
-$config_headers
-
-Report bugs to <bug-autoconf at gnu.org>."
-_ACEOF
-
-cat >>$CONFIG_STATUS <<_ACEOF
-ac_cs_version="\\
-SQUID config.status 1.9g
-configured by $0, generated by GNU Autoconf 2.57,
- with options \\"`echo "$ac_configure_args" | sed 's/[\\""\`\$]/\\\\&/g'`\\"
-
-Copyright 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001
-Free Software Foundation, Inc.
-This config.status script is free software; the Free Software Foundation
-gives unlimited permission to copy, distribute and modify it."
-srcdir=$srcdir
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-# If no file are specified by the user, then we need to provide default
-# value. By we need to know if files were specified by the user.
-ac_need_defaults=:
-while test $# != 0
-do
- case $1 in
- --*=*)
- ac_option=`expr "x$1" : 'x\([^=]*\)='`
- ac_optarg=`expr "x$1" : 'x[^=]*=\(.*\)'`
- ac_shift=:
- ;;
- -*)
- ac_option=$1
- ac_optarg=$2
- ac_shift=shift
- ;;
- *) # This is not an option, so the user has probably given explicit
- # arguments.
- ac_option=$1
- ac_need_defaults=false;;
- esac
-
- case $ac_option in
- # Handling of the options.
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
- -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
- ac_cs_recheck=: ;;
- --version | --vers* | -V )
- echo "$ac_cs_version"; exit 0 ;;
- --he | --h)
- # Conflict between --help and --header
- { { echo "$as_me:$LINENO: error: ambiguous option: $1
-Try \`$0 --help' for more information." >&5
-echo "$as_me: error: ambiguous option: $1
-Try \`$0 --help' for more information." >&2;}
- { (exit 1); exit 1; }; };;
- --help | --hel | -h )
- echo "$ac_cs_usage"; exit 0 ;;
- --debug | --d* | -d )
- debug=: ;;
- --file | --fil | --fi | --f )
- $ac_shift
- CONFIG_FILES="$CONFIG_FILES $ac_optarg"
- ac_need_defaults=false;;
- --header | --heade | --head | --hea )
- $ac_shift
- CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
- ac_need_defaults=false;;
- -q | -quiet | --quiet | --quie | --qui | --qu | --q \
- | -silent | --silent | --silen | --sile | --sil | --si | --s)
- ac_cs_silent=: ;;
-
- # This is an error.
- -*) { { echo "$as_me:$LINENO: error: unrecognized option: $1
-Try \`$0 --help' for more information." >&5
-echo "$as_me: error: unrecognized option: $1
-Try \`$0 --help' for more information." >&2;}
- { (exit 1); exit 1; }; } ;;
-
- *) ac_config_targets="$ac_config_targets $1" ;;
-
- esac
- shift
-done
-
-ac_configure_extra_args=
-
-if $ac_cs_silent; then
- exec 6>/dev/null
- ac_configure_extra_args="$ac_configure_extra_args --silent"
-fi
-
-_ACEOF
-cat >>$CONFIG_STATUS <<_ACEOF
-if \$ac_cs_recheck; then
- echo "running $SHELL $0 " $ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
- exec $SHELL $0 $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
-fi
-
-_ACEOF
-
-
-
-
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-for ac_config_target in $ac_config_targets
-do
- case "$ac_config_target" in
- # Handling of arguments.
- "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;;
- "Testsuite/Makefile" ) CONFIG_FILES="$CONFIG_FILES Testsuite/Makefile" ;;
- "squid.h" ) CONFIG_FILES="$CONFIG_FILES squid.h" ;;
- "squidconf.h" ) CONFIG_HEADERS="$CONFIG_HEADERS squidconf.h" ;;
- *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
-echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
- { (exit 1); exit 1; }; };;
- esac
-done
-
-# If the user did not use the arguments to specify the items to instantiate,
-# then the envvar interface is used. Set only those that are not.
-# We use the long form for the default assignment because of an extremely
-# bizarre bug on SunOS 4.1.3.
-if $ac_need_defaults; then
- test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
- test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
-fi
-
-# Have a temporary directory for convenience. Make it in the build tree
-# simply because there is no reason to put it here, and in addition,
-# creating and moving files from /tmp can sometimes cause problems.
-# Create a temporary directory, and hook for its removal unless debugging.
-$debug ||
-{
- trap 'exit_status=$?; rm -rf $tmp && exit $exit_status' 0
- trap '{ (exit 1); exit 1; }' 1 2 13 15
-}
-
-# Create a (secure) tmp directory for tmp files.
-
-{
- tmp=`(umask 077 && mktemp -d -q "./confstatXXXXXX") 2>/dev/null` &&
- test -n "$tmp" && test -d "$tmp"
-} ||
-{
- tmp=./confstat$$-$RANDOM
- (umask 077 && mkdir $tmp)
-} ||
-{
- echo "$me: cannot create a temporary directory in ." >&2
- { (exit 1); exit 1; }
-}
-
-_ACEOF
-
-cat >>$CONFIG_STATUS <<_ACEOF
-
-#
-# CONFIG_FILES section.
-#
-
-# No need to generate the scripts if there are no CONFIG_FILES.
-# This happens for instance when ./config.status config.h
-if test -n "\$CONFIG_FILES"; then
- # Protect against being on the right side of a sed subst in config.status.
- sed 's/,@/@@/; s/@,/@@/; s/,;t t\$/@;t t/; /@;t t\$/s/[\\\\&,]/\\\\&/g;
- s/@@/,@/; s/@@/@,/; s/@;t t\$/,;t t/' >\$tmp/subs.sed <<\\CEOF
-s, at SHELL@,$SHELL,;t t
-s, at PATH_SEPARATOR@,$PATH_SEPARATOR,;t t
-s, at PACKAGE_NAME@,$PACKAGE_NAME,;t t
-s, at PACKAGE_TARNAME@,$PACKAGE_TARNAME,;t t
-s, at PACKAGE_VERSION@,$PACKAGE_VERSION,;t t
-s, at PACKAGE_STRING@,$PACKAGE_STRING,;t t
-s, at PACKAGE_BUGREPORT@,$PACKAGE_BUGREPORT,;t t
-s, at exec_prefix@,$exec_prefix,;t t
-s, at prefix@,$prefix,;t t
-s, at program_transform_name@,$program_transform_name,;t t
-s, at bindir@,$bindir,;t t
-s, at sbindir@,$sbindir,;t t
-s, at libexecdir@,$libexecdir,;t t
-s, at datadir@,$datadir,;t t
-s, at sysconfdir@,$sysconfdir,;t t
-s, at sharedstatedir@,$sharedstatedir,;t t
-s, at localstatedir@,$localstatedir,;t t
-s, at libdir@,$libdir,;t t
-s, at includedir@,$includedir,;t t
-s, at oldincludedir@,$oldincludedir,;t t
-s, at infodir@,$infodir,;t t
-s, at mandir@,$mandir,;t t
-s, at build_alias@,$build_alias,;t t
-s, at host_alias@,$host_alias,;t t
-s, at target_alias@,$target_alias,;t t
-s, at DEFS@,$DEFS,;t t
-s, at ECHO_C@,$ECHO_C,;t t
-s, at ECHO_N@,$ECHO_N,;t t
-s, at ECHO_T@,$ECHO_T,;t t
-s, at LIBS@,$LIBS,;t t
-s, at SQUID_RELCODE@,$SQUID_RELCODE,;t t
-s, at SQUID_DATE@,$SQUID_DATE,;t t
-s, at SQUID_COPYRIGHT@,$SQUID_COPYRIGHT,;t t
-s, at SQUID_LICENSE@,$SQUID_LICENSE,;t t
-s, at SQUID_LICENSETAG@,$SQUID_LICENSETAG,;t t
-s, at SQUID_VERSION@,$SQUID_VERSION,;t t
-s, at CC@,$CC,;t t
-s, at CFLAGS@,$CFLAGS,;t t
-s, at LDFLAGS@,$LDFLAGS,;t t
-s, at CPPFLAGS@,$CPPFLAGS,;t t
-s, at ac_ct_CC@,$ac_ct_CC,;t t
-s, at EXEEXT@,$EXEEXT,;t t
-s, at OBJEXT@,$OBJEXT,;t t
-s, at RANLIB@,$RANLIB,;t t
-s, at ac_ct_RANLIB@,$ac_ct_RANLIB,;t t
-s, at AR@,$AR,;t t
-s, at EXEC_DEPENDENCY@,$EXEC_DEPENDENCY,;t t
-s, at CPP@,$CPP,;t t
-s, at EGREP@,$EGREP,;t t
-s, at SQD_UINT16@,$SQD_UINT16,;t t
-s, at SQD_UINT32@,$SQD_UINT32,;t t
-s, at SQD_UINT64@,$SQD_UINT64,;t t
-s, at PVMLIBDIR@,$PVMLIBDIR,;t t
-s, at PVMINCDIR@,$PVMINCDIR,;t t
-s, at PVMLIBS@,$PVMLIBS,;t t
-s, at LIBOBJS@,$LIBOBJS,;t t
-s, at LTLIBOBJS@,$LTLIBOBJS,;t t
-CEOF
-
-_ACEOF
-
- cat >>$CONFIG_STATUS <<\_ACEOF
- # Split the substitutions into bite-sized pieces for seds with
- # small command number limits, like on Digital OSF/1 and HP-UX.
- ac_max_sed_lines=48
- ac_sed_frag=1 # Number of current file.
- ac_beg=1 # First line for current file.
- ac_end=$ac_max_sed_lines # Line after last line for current file.
- ac_more_lines=:
- ac_sed_cmds=
- while $ac_more_lines; do
- if test $ac_beg -gt 1; then
- sed "1,${ac_beg}d; ${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
- else
- sed "${ac_end}q" $tmp/subs.sed >$tmp/subs.frag
- fi
- if test ! -s $tmp/subs.frag; then
- ac_more_lines=false
- else
- # The purpose of the label and of the branching condition is to
- # speed up the sed processing (if there are no `@' at all, there
- # is no need to browse any of the substitutions).
- # These are the two extra sed commands mentioned above.
- (echo ':t
- /@[a-zA-Z_][a-zA-Z_0-9]*@/!b' && cat $tmp/subs.frag) >$tmp/subs-$ac_sed_frag.sed
- if test -z "$ac_sed_cmds"; then
- ac_sed_cmds="sed -f $tmp/subs-$ac_sed_frag.sed"
- else
- ac_sed_cmds="$ac_sed_cmds | sed -f $tmp/subs-$ac_sed_frag.sed"
- fi
- ac_sed_frag=`expr $ac_sed_frag + 1`
- ac_beg=$ac_end
- ac_end=`expr $ac_end + $ac_max_sed_lines`
- fi
- done
- if test -z "$ac_sed_cmds"; then
- ac_sed_cmds=cat
- fi
-fi # test -n "$CONFIG_FILES"
-
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
-for ac_file in : $CONFIG_FILES; do test "x$ac_file" = x: && continue
- # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
- case $ac_file in
- - | *:- | *:-:* ) # input from stdin
- cat >$tmp/stdin
- ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
- ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
- *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
- ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
- * ) ac_file_in=$ac_file.in ;;
- esac
-
- # Compute @srcdir@, @top_srcdir@, and @INSTALL@ for subdirectories.
- ac_dir=`(dirname "$ac_file") 2>/dev/null ||
-$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$ac_file" : 'X\(//\)[^/]' \| \
- X"$ac_file" : 'X\(//\)$' \| \
- X"$ac_file" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$ac_file" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- { if $as_mkdir_p; then
- mkdir -p "$ac_dir"
- else
- as_dir="$ac_dir"
- as_dirs=
- while test ! -d "$as_dir"; do
- as_dirs="$as_dir $as_dirs"
- as_dir=`(dirname "$as_dir") 2>/dev/null ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- done
- test ! -n "$as_dirs" || mkdir $as_dirs
- fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
-echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
- { (exit 1); exit 1; }; }; }
-
- ac_builddir=.
-
-if test "$ac_dir" != .; then
- ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
- # A "../" for each directory in $ac_dir_suffix.
- ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'`
-else
- ac_dir_suffix= ac_top_builddir=
-fi
-
-case $srcdir in
- .) # No --srcdir option. We are building in place.
- ac_srcdir=.
- if test -z "$ac_top_builddir"; then
- ac_top_srcdir=.
- else
- ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'`
- fi ;;
- [\\/]* | ?:[\\/]* ) # Absolute path.
- ac_srcdir=$srcdir$ac_dir_suffix;
- ac_top_srcdir=$srcdir ;;
- *) # Relative path.
- ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix
- ac_top_srcdir=$ac_top_builddir$srcdir ;;
-esac
-# Don't blindly perform a `cd "$ac_dir"/$ac_foo && pwd` since $ac_foo can be
-# absolute.
-ac_abs_builddir=`cd "$ac_dir" && cd $ac_builddir && pwd`
-ac_abs_top_builddir=`cd "$ac_dir" && cd ${ac_top_builddir}. && pwd`
-ac_abs_srcdir=`cd "$ac_dir" && cd $ac_srcdir && pwd`
-ac_abs_top_srcdir=`cd "$ac_dir" && cd $ac_top_srcdir && pwd`
-
-
-
- if test x"$ac_file" != x-; then
- { echo "$as_me:$LINENO: creating $ac_file" >&5
-echo "$as_me: creating $ac_file" >&6;}
- rm -f "$ac_file"
- fi
- # Let's still pretend it is `configure' which instantiates (i.e., don't
- # use $as_me), people would be surprised to read:
- # /* config.h. Generated by config.status. */
- if test x"$ac_file" = x-; then
- configure_input=
- else
- configure_input="$ac_file. "
- fi
- configure_input=$configure_input"Generated from `echo $ac_file_in |
- sed 's,.*/,,'` by configure."
-
- # First look for the input files in the build tree, otherwise in the
- # src tree.
- ac_file_inputs=`IFS=:
- for f in $ac_file_in; do
- case $f in
- -) echo $tmp/stdin ;;
- [\\/$]*)
- # Absolute (can't be DOS-style, as IFS=:)
- test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
- { (exit 1); exit 1; }; }
- echo $f;;
- *) # Relative
- if test -f "$f"; then
- # Build tree
- echo $f
- elif test -f "$srcdir/$f"; then
- # Source tree
- echo $srcdir/$f
- else
- # /dev/null tree
- { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
- { (exit 1); exit 1; }; }
- fi;;
- esac
- done` || { (exit 1); exit 1; }
-_ACEOF
-cat >>$CONFIG_STATUS <<_ACEOF
- sed "$ac_vpsub
-$extrasub
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
-:t
-/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
-s, at configure_input@,$configure_input,;t t
-s, at srcdir@,$ac_srcdir,;t t
-s, at abs_srcdir@,$ac_abs_srcdir,;t t
-s, at top_srcdir@,$ac_top_srcdir,;t t
-s, at abs_top_srcdir@,$ac_abs_top_srcdir,;t t
-s, at builddir@,$ac_builddir,;t t
-s, at abs_builddir@,$ac_abs_builddir,;t t
-s, at top_builddir@,$ac_top_builddir,;t t
-s, at abs_top_builddir@,$ac_abs_top_builddir,;t t
-" $ac_file_inputs | (eval "$ac_sed_cmds") >$tmp/out
- rm -f $tmp/stdin
- if test x"$ac_file" != x-; then
- mv $tmp/out $ac_file
- else
- cat $tmp/out
- rm -f $tmp/out
- fi
-
-done
-_ACEOF
-cat >>$CONFIG_STATUS <<\_ACEOF
-
-#
-# CONFIG_HEADER section.
-#
-
-# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where
-# NAME is the cpp macro being defined and VALUE is the value it is being given.
-#
-# ac_d sets the value in "#define NAME VALUE" lines.
-ac_dA='s,^\([ ]*\)#\([ ]*define[ ][ ]*\)'
-ac_dB='[ ].*$,\1#\2'
-ac_dC=' '
-ac_dD=',;t'
-# ac_u turns "#undef NAME" without trailing blanks into "#define NAME VALUE".
-ac_uA='s,^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)'
-ac_uB='$,\1#\2define\3'
-ac_uC=' '
-ac_uD=',;t'
-
-for ac_file in : $CONFIG_HEADERS; do test "x$ac_file" = x: && continue
- # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in".
- case $ac_file in
- - | *:- | *:-:* ) # input from stdin
- cat >$tmp/stdin
- ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
- ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
- *:* ) ac_file_in=`echo "$ac_file" | sed 's,[^:]*:,,'`
- ac_file=`echo "$ac_file" | sed 's,:.*,,'` ;;
- * ) ac_file_in=$ac_file.in ;;
- esac
-
- test x"$ac_file" != x- && { echo "$as_me:$LINENO: creating $ac_file" >&5
-echo "$as_me: creating $ac_file" >&6;}
-
- # First look for the input files in the build tree, otherwise in the
- # src tree.
- ac_file_inputs=`IFS=:
- for f in $ac_file_in; do
- case $f in
- -) echo $tmp/stdin ;;
- [\\/$]*)
- # Absolute (can't be DOS-style, as IFS=:)
- test -f "$f" || { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
- { (exit 1); exit 1; }; }
- echo $f;;
- *) # Relative
- if test -f "$f"; then
- # Build tree
- echo $f
- elif test -f "$srcdir/$f"; then
- # Source tree
- echo $srcdir/$f
- else
- # /dev/null tree
- { { echo "$as_me:$LINENO: error: cannot find input file: $f" >&5
-echo "$as_me: error: cannot find input file: $f" >&2;}
- { (exit 1); exit 1; }; }
- fi;;
- esac
- done` || { (exit 1); exit 1; }
- # Remove the trailing spaces.
- sed 's/[ ]*$//' $ac_file_inputs >$tmp/in
-
-_ACEOF
-
-# Transform confdefs.h into two sed scripts, `conftest.defines' and
-# `conftest.undefs', that substitutes the proper values into
-# config.h.in to produce config.h. The first handles `#define'
-# templates, and the second `#undef' templates.
-# And first: Protect against being on the right side of a sed subst in
-# config.status. Protect against being in an unquoted here document
-# in config.status.
-rm -f conftest.defines conftest.undefs
-# Using a here document instead of a string reduces the quoting nightmare.
-# Putting comments in sed scripts is not portable.
-#
-# `end' is used to avoid that the second main sed command (meant for
-# 0-ary CPP macros) applies to n-ary macro definitions.
-# See the Autoconf documentation for `clear'.
-cat >confdef2sed.sed <<\_ACEOF
-s/[\\&,]/\\&/g
-s,[\\$`],\\&,g
-t clear
-: clear
-s,^[ ]*#[ ]*define[ ][ ]*\([^ (][^ (]*\)\(([^)]*)\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1\2${ac_dC}\3${ac_dD},gp
-t end
-s,^[ ]*#[ ]*define[ ][ ]*\([^ ][^ ]*\)[ ]*\(.*\)$,${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD},gp
-: end
-_ACEOF
-# If some macros were called several times there might be several times
-# the same #defines, which is useless. Nevertheless, we may not want to
-# sort them, since we want the *last* AC-DEFINE to be honored.
-uniq confdefs.h | sed -n -f confdef2sed.sed >conftest.defines
-sed 's/ac_d/ac_u/g' conftest.defines >conftest.undefs
-rm -f confdef2sed.sed
-
-# This sed command replaces #undef with comments. This is necessary, for
-# example, in the case of _POSIX_SOURCE, which is predefined and required
-# on some systems where configure will not decide to define it.
-cat >>conftest.undefs <<\_ACEOF
-s,^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*,/* & */,
-_ACEOF
-
-# Break up conftest.defines because some shells have a limit on the size
-# of here documents, and old seds have small limits too (100 cmds).
-echo ' # Handle all the #define templates only if necessary.' >>$CONFIG_STATUS
-echo ' if grep "^[ ]*#[ ]*define" $tmp/in >/dev/null; then' >>$CONFIG_STATUS
-echo ' # If there are no defines, we may have an empty if/fi' >>$CONFIG_STATUS
-echo ' :' >>$CONFIG_STATUS
-rm -f conftest.tail
-while grep . conftest.defines >/dev/null
-do
- # Write a limited-size here document to $tmp/defines.sed.
- echo ' cat >$tmp/defines.sed <<CEOF' >>$CONFIG_STATUS
- # Speed up: don't consider the non `#define' lines.
- echo '/^[ ]*#[ ]*define/!b' >>$CONFIG_STATUS
- # Work around the forget-to-reset-the-flag bug.
- echo 't clr' >>$CONFIG_STATUS
- echo ': clr' >>$CONFIG_STATUS
- sed ${ac_max_here_lines}q conftest.defines >>$CONFIG_STATUS
- echo 'CEOF
- sed -f $tmp/defines.sed $tmp/in >$tmp/out
- rm -f $tmp/in
- mv $tmp/out $tmp/in
-' >>$CONFIG_STATUS
- sed 1,${ac_max_here_lines}d conftest.defines >conftest.tail
- rm -f conftest.defines
- mv conftest.tail conftest.defines
-done
-rm -f conftest.defines
-echo ' fi # grep' >>$CONFIG_STATUS
-echo >>$CONFIG_STATUS
-
-# Break up conftest.undefs because some shells have a limit on the size
-# of here documents, and old seds have small limits too (100 cmds).
-echo ' # Handle all the #undef templates' >>$CONFIG_STATUS
-rm -f conftest.tail
-while grep . conftest.undefs >/dev/null
-do
- # Write a limited-size here document to $tmp/undefs.sed.
- echo ' cat >$tmp/undefs.sed <<CEOF' >>$CONFIG_STATUS
- # Speed up: don't consider the non `#undef'
- echo '/^[ ]*#[ ]*undef/!b' >>$CONFIG_STATUS
- # Work around the forget-to-reset-the-flag bug.
- echo 't clr' >>$CONFIG_STATUS
- echo ': clr' >>$CONFIG_STATUS
- sed ${ac_max_here_lines}q conftest.undefs >>$CONFIG_STATUS
- echo 'CEOF
- sed -f $tmp/undefs.sed $tmp/in >$tmp/out
- rm -f $tmp/in
- mv $tmp/out $tmp/in
-' >>$CONFIG_STATUS
- sed 1,${ac_max_here_lines}d conftest.undefs >conftest.tail
- rm -f conftest.undefs
- mv conftest.tail conftest.undefs
-done
-rm -f conftest.undefs
-
-cat >>$CONFIG_STATUS <<\_ACEOF
- # Let's still pretend it is `configure' which instantiates (i.e., don't
- # use $as_me), people would be surprised to read:
- # /* config.h. Generated by config.status. */
- if test x"$ac_file" = x-; then
- echo "/* Generated by configure. */" >$tmp/config.h
- else
- echo "/* $ac_file. Generated by configure. */" >$tmp/config.h
- fi
- cat $tmp/in >>$tmp/config.h
- rm -f $tmp/in
- if test x"$ac_file" != x-; then
- if diff $ac_file $tmp/config.h >/dev/null 2>&1; then
- { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
-echo "$as_me: $ac_file is unchanged" >&6;}
- else
- ac_dir=`(dirname "$ac_file") 2>/dev/null ||
-$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$ac_file" : 'X\(//\)[^/]' \| \
- X"$ac_file" : 'X\(//\)$' \| \
- X"$ac_file" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$ac_file" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- { if $as_mkdir_p; then
- mkdir -p "$ac_dir"
- else
- as_dir="$ac_dir"
- as_dirs=
- while test ! -d "$as_dir"; do
- as_dirs="$as_dir $as_dirs"
- as_dir=`(dirname "$as_dir") 2>/dev/null ||
-$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
- X"$as_dir" : 'X\(//\)[^/]' \| \
- X"$as_dir" : 'X\(//\)$' \| \
- X"$as_dir" : 'X\(/\)' \| \
- . : '\(.\)' 2>/dev/null ||
-echo X"$as_dir" |
- sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; }
- /^X\(\/\/\)[^/].*/{ s//\1/; q; }
- /^X\(\/\/\)$/{ s//\1/; q; }
- /^X\(\/\).*/{ s//\1/; q; }
- s/.*/./; q'`
- done
- test ! -n "$as_dirs" || mkdir $as_dirs
- fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5
-echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;}
- { (exit 1); exit 1; }; }; }
-
- rm -f $ac_file
- mv $tmp/config.h $ac_file
- fi
- else
- cat $tmp/config.h
- rm -f $tmp/config.h
- fi
-done
-_ACEOF
-
-cat >>$CONFIG_STATUS <<\_ACEOF
-
-{ (exit 0); exit 0; }
-_ACEOF
-chmod +x $CONFIG_STATUS
-ac_clean_files=$ac_clean_files_save
-
-
-# configure is writing to config.log, and then calls config.status.
-# config.status does its own redirection, appending to config.log.
-# Unfortunately, on DOS this fails, as config.log is still kept open
-# by configure, so config.status won't be able to write to it; its
-# output is simply discarded. So we exec the FD to /dev/null,
-# effectively closing config.log, so it can be properly (re)opened and
-# appended to by config.status. When coming back to configure, we
-# need to make the FD available again.
-if test "$no_create" != yes; then
- ac_cs_success=:
- ac_config_status_args=
- test "$silent" = yes &&
- ac_config_status_args="$ac_config_status_args --quiet"
- exec 5>/dev/null
- $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
- exec 5>>config.log
- # Use ||, not &&, to avoid exiting from the if with $? = 1, which
- # would make configure fail if this is the last instruction.
- $ac_cs_success || { (exit 1); exit 1; }
-fi
-
-
-
diff --git a/squid/dayhoff.c b/squid/dayhoff.c
deleted file mode 100644
index 1e37a58..0000000
--- a/squid/dayhoff.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* dayhoff.c
- *
- * Routines for dealing with PAM matrices.
- *
- * Includes:
- * ParsePAMFile() -- read a PAM matrix from disk.
- *
- *
- * SRE - Fri Apr 2 11:23:45 1993
- * CVS $Id: dayhoff.c,v 1.7 2003/05/26 16:21:50 eddy Exp $
- */
-
-
-#include "squidconf.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include <ctype.h>
-#include "squid.h"
-
-/* Function: ParsePAMFile()
- *
- * Purpose: Given a pointer to an open file containing a PAM matrix,
- * parse the file and allocate and fill a 2D array of
- * floats containing the matrix. The PAM file is
- * assumed to be in the format that NCBI distributes
- * with BLAST. BLOSUM matrices also work fine, as
- * produced by Henikoff's program "MATBLAS".
- *
- * Parses both old format and new format BLAST matrices.
- * Old format just had rows of integers.
- * New format includes a leading character on each row.
- *
- * The PAM matrix is a 27x27 matrix, 0=A..25=Z,26=*.
- * Note that it's not a 20x20 matrix as you might expect;
- * this is for speed of indexing as well as the ability
- * to deal with ambiguous characters.
- *
- * Args: fp - open PAM file
- * ret_pam - RETURN: pam matrix, integers
- * ret_scale - RETURN: scale factor for converting
- * to real Sij. For instance, PAM120 is
- * given in units of ln(2)/2. This may
- * be passed as NULL if the caller
- * doesn't care.
- *
- * Returns: 1 on success; 0 on failure and sets squid_errno to
- * indicate the cause. ret_pam is allocated here and
- * must be freed by the caller (use FreePAM).
- */
-int
-ParsePAMFile(FILE *fp, int ***ret_pam, float *ret_scale)
-{
- int **pam;
- char buffer[512]; /* input buffer from fp */
- int order[27]; /* order of fields, obtained from header */
- int nsymbols; /* total number of symbols in matrix */
- char *sptr;
- int idx;
- int row, col;
- float scale;
- int gotscale = FALSE;
-
- scale = 0.0; /* just to silence gcc uninit warnings */
- if (fp == NULL) { squid_errno = SQERR_NODATA; return 0; }
-
- /* Look at the first non-blank, non-comment line in the file.
- * It gives single-letter codes in the order the PAM matrix
- * is arrayed in the file.
- */
- do {
- if (fgets(buffer, 512, fp) == NULL)
- { squid_errno = SQERR_NODATA; return 0; }
-
- /* Get the scale factor from the header.
- * For BLOSUM files, we assume the line looks like:
- * BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
- * and we assume that the fraction is always 1/x;
- *
- * For PAM files, we assume the line looks like:
- * PAM 120 substitution matrix, scale = ln(2)/2 = 0.346574
- * and we assume that the number following the final '=' is our scale
- */
- if (strstr(buffer, "BLOSUM Clustered Scoring Matrix") != NULL &&
- (sptr = strchr(buffer, '/')) != NULL)
- {
- sptr++;
- if (! isdigit((int) (*sptr))) { squid_errno = SQERR_FORMAT; return 0; }
- scale = (float) ( (log(2.0)) / (atof(sptr)));
- gotscale = TRUE;
- }
- else if (strstr(buffer, "substitution matrix,") != NULL)
- {
- while ((sptr = strrchr(buffer, '=')) != NULL) {
- sptr += 2;
- if (IsReal(sptr)) {
- scale = atof(sptr);
- gotscale = TRUE;
- break;
- }
- }
- }
- } while ((sptr = strtok(buffer, " \t\n")) == NULL || *sptr == '#');
-
- idx = 0;
- do {
- order[idx] = (int) *sptr - (int) 'A';
- if (order[idx] < 0 || order[idx] > 25) order[idx] = 26;
- idx++;
- } while ((sptr = strtok(NULL, " \t\n")) != NULL);
- nsymbols = idx;
-
- /* Allocate a pam matrix. For speed of indexing, we use
- * a 27x27 matrix so we can do lookups using the ASCII codes
- * of amino acid single-letter representations, plus one
- * extra field to deal with the "*" (terminators).
- */
- if ((pam = (int **) calloc (27, sizeof(int *))) == NULL)
- Die("calloc failed");
- for (idx = 0; idx < 27; idx++)
- if ((pam[idx] = (int *) calloc (27, sizeof(int))) == NULL)
- Die("calloc failed");
-
- /* Parse the rest of the file.
- */
- for (row = 0; row < nsymbols; row++)
- {
- if (fgets(buffer, 512, fp) == NULL)
- { squid_errno = SQERR_NODATA; return 0; }
-
- if ((sptr = strtok(buffer, " \t\n")) == NULL)
- { squid_errno = SQERR_NODATA; return 0; }
- for (col = 0; col < nsymbols; col++)
- {
- if (sptr == NULL) { squid_errno = SQERR_NODATA; return 0; }
-
- /* Watch out for new BLAST format, with leading characters
- */
- if (*sptr == '*' || isalpha((int) *sptr))
- col--; /* hack hack */
- else
- pam [order[row]] [order[col]] = atoi(sptr);
-
- sptr = strtok(NULL, " \t\n");
- }
- }
-
- /* Return
- */
- if (ret_scale != NULL)
- {
- if (gotscale) *ret_scale = scale;
- else
- {
- Warn("Failed to parse PAM matrix scale factor. Defaulting to ln(2)/2!");
- *ret_scale = log(2.0) / 2.0;
- }
- }
- *ret_pam = pam;
- return 1;
-}
diff --git a/squid/eps.c b/squid/eps.c
deleted file mode 100644
index cfc7494..0000000
--- a/squid/eps.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* eps.c
- * SRE, Thu Jun 21 18:02:31 2001 [St. Louis]
- *
- * Some crude support for Encapsulated PostScript (EPS) output,
- * DSC compliant.
- *
- * CVS $Id: eps.c,v 1.5 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "squid.h"
-#include "msa.h"
-
-/* Function: EPSWriteSmallMSA()
- * Date: SRE, Thu Jun 21 18:15:21 2001 [St. Louis]
- *
- * Purpose: Write an alignment in singleblock, Stockholm/SELEX like
- * format to an open file. Very crude.
- * Currently fails if the alignment is >50 columns long, because
- * it doesn't think it will fit on a single page.
- *
- * Args: fp - open file for writing
- * msa - alignment to write
- *
- * Returns: (void)
- */
-void
-EPSWriteSmallMSA(FILE *fp, MSA *msa)
-{
- int namewidth; /* namewidth in PostScript units */
- int fontwidth; /* width of a character in this font */
- int hspace; /* horizontal space between aligned chars */
- int vspace; /* vertical space between sequences */
- char *font; /* font name, e.g. "Courier" */
- int fontsize; /* font size in pts */
- int i,j; /* counter over sequences, columns */
- int len; /* tmp var holding length of something */
- int width, height; /* width and height of bounding box */
- int xpos, ypos; /* x,y position */
-
- /* Set some font characteristics; done here, so it'll
- * be easy to change. Magic numbers for Courier 12 determined
- * by trial and error.
- */
- fontwidth = 8;
- hspace = 9;
- vspace = 15;
- font = sre_strdup("Courier", -1);
- fontsize = 12;
-
- /* Find the width of the longest sequence name in characters.
- */
- namewidth = 0;
- for (i = 0; i < msa->nseq; i++)
- if ((len = (int) strlen(msa->sqname[i])) > namewidth)
- namewidth = len;
- namewidth += 1; /* add a space to separate name & aligned seq */
- namewidth *= fontwidth;
-
- /* Determine bounding box
- */
- if (msa->alen > 50) Die("No EPS fmt if alignment is >50 columns");
- width = namewidth + hspace*msa->alen;
- if (width > 612) Die("Alignment too wide to write in EPS");
- height = vspace*msa->nseq;
- if (height > 792) Die("Too many seqs to write in EPS");
-
- /* Magic EPS header, bare-bones DSC-compliant.
- */
- fprintf(fp, "%%!PS-Adobe-3.0 EPSF-3.0\n");
- fprintf(fp, "%%%%BoundingBox: %d %d %d %d\n", 0, 0, width, height);
- fprintf(fp, "%%%%Pages: 1\n");
- fprintf(fp, "%%%%EndComments\n");
-
- /* More postscript magic before we start the alignment
- */
- fprintf(fp, "/%s findfont\n", font);
- fprintf(fp, "%d scalefont\n", fontsize);
- fprintf(fp, "setfont\n");
- fprintf(fp, "newpath\n");
-
- /* Write the alignment in PostScript in a single block
- */
- for (i = 0; i < msa->nseq; i++)
- {
- ypos = (msa->nseq-i-1)*vspace;
- /* name first */
- fprintf(fp, "%d %d moveto\n", 0, ypos);
- fprintf(fp, "(%s) show\n", msa->sqname[i]);
- /* now seq */
- xpos = namewidth;
- for (j = 0; j < msa->alen; j++)
- {
- fprintf(fp, "%d %d moveto\n", xpos, ypos);
- fprintf(fp, "(%c) show\n", msa->aseq[i][j]);
- xpos+= hspace;
- }
- }
-
- free(font);
-}
-
-
diff --git a/squid/file.c b/squid/file.c
deleted file mode 100644
index 90f929d..0000000
--- a/squid/file.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-
-/* file.c
- * SRE, Wed Jun 19 11:19:22 1996
- *
- * File operation utilities, dealing with pathnames, directories,
- * and environment variables.
- *
- * The goal is to have these be platform-independent but they
- * currently are UNIX-specific: i.e. this file is currently POSIX compliant
- * but it is NOT ANSI C compliant. (The sole offender is getenv().)
- *
- * CVS $Id: file.c,v 1.9 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "squid.h"
-#include "sqfuncs.h"
-
-/*
- * VMS: #define DIRSLASH ']'
- * MacOS: #define DIRSLASH ':'
- * DOS: #define DIRSLASH '\\'
- *
- * The code assumes that '.' is used for file name extensions,
- * such as "foo.bar".
- */
-#define DIRSLASH '/' /* UNIX directory paths have /foo/bar */
-
-
-
-/* Function: FileDirname()
- *
- * Purpose: Returns the path from a filename:
- * "/foo/bar/baz" -> "/foo/bar"
- * "foo/bar" -> "foo"
- * "foo" -> "."
- * "/" -> "/"
- * i.e. the string will be non-NULL; it will
- * contain the string up to but not including the
- * last '/' character; returns "." if
- * there are no '/' characters, and returns "/"
- * if the last slash is the first character.
- * Modeled on Tcl's "file dirname" command.
- *
- * Args: file - name of file "/foo/bar/baz".
- *
- * Return: ptr to malloc'ed string "/foo/bar".
- */
-char *
-FileDirname(char *file)
-{
- char *dirname;
- char *lastslash;
- int len;
-
- lastslash = strrchr(file, DIRSLASH);
- len = (lastslash == NULL) ? 0 : (int) (lastslash - file);
- dirname = (char *) MallocOrDie (sizeof(char) * (len+2));
- if (len > 0) strncpy(dirname, file, len);
- else if (*file != DIRSLASH) { *dirname = '.'; len = 1; }
- else { *dirname = DIRSLASH; len = 1; }
- dirname[len] = '\0';
- return dirname;
-}
-
-
-/* Function: FileTail()
- *
- * Purpose: Return everything after the DIRSLASH:
- * "/foo/bar/baz.1" -> "baz.1"
- * "foo/bar" -> "bar"
- * "foo" -> "foo"
- * "/" -> ""
- * If noextension is TRUE, removes a trailing ".foo" extension
- * too.
- *
- * Args: file - name of file "/foo/bar/baz.1"
- * noextension - TRUE to also remove extensions
- *
- * Return: ptr to malloc'ed string "baz.1"
- */
-char *
-FileTail(char *file, int noextension)
-{
- char *tail;
- char *lastslash;
- char *lastdot;
- /* remove directory prefix */
- lastslash = strrchr(file, DIRSLASH);
- tail = (char *) MallocOrDie (sizeof(char) * (strlen(file)+1));
- if (lastslash == NULL) strcpy(tail, file);
- else strcpy(tail, lastslash+1);
- /* remove trailing suffix */
- if (noextension) {
- if ((lastdot = strrchr(tail, '.')) != NULL)
- *lastdot = '\0';
- }
-
- return tail;
-}
-
-
-/* Function: FileSameDirectory()
- * Date: SRE, Wed Mar 6 20:03:23 2002 [St. Louis]
- *
- * Purpose: Given a path to one file, and the
- * name of another file in the same directory,
- * concat the path from file1 onto file2, and
- * return the result. Caller must free the ptr
- * that's returned.
- *
- * Written for SSI - SSI indices contain filenames
- * without paths, and we will need to convert that
- * to a full path.
- *
- * Args: file1 - a path to a file, e.g. "/foo/bar/baz.1"
- * file2 - a simple filename, e.g. "quux.2"
- *
- * Returns: path to file2: e.g. "/foo/bar/quux.2"
- * Returns NULL if file2 already has a path, and the result
- * would be a different place.
- */
-char *
-FileSameDirectory(char *file1, char *file2)
-{
- char *path;
- char *tail;
- char *result;
- int seems_ok = 1;
-
- path = FileDirname(file1);
- tail = FileTail(file2, FALSE);
- if (strcmp(file2, tail) != 0) seems_ok = 0; /* ut-oh, file2 *had* a path */
- result = FileConcat(path, tail);
- if (! seems_ok && strcmp(result, file2) != 0) {
- free(result); result = NULL;
- }
- free(path);
- free(tail);
- return result;
-}
-
-/* Function: FileConcat()
- *
- * Purpose: Concatenate a directory path and a file name,
- * returning a pointer to a malloc'ed string with the
- * full filename. This isn't just a string concat,
- * because we're careful about the dir slash.
- */
-char *
-FileConcat(char *dir, char *file)
-{
- char *full;
-
- full = (char *) MallocOrDie (sizeof(char) * (strlen(dir)+strlen(file)+2));
- if (*file == DIRSLASH) strcpy(full, file); /* file = "/foo", ignore directory. */
- else sprintf(full, "%s%c%s", dir, DIRSLASH, file);
- return full;
-}
-
-
-/* Function: FileAddSuffix()
- * Date: SRE, Wed Aug 1 11:19:33 2001 [Pasadena]
- *
- * Purpose: Add a suffix to a filename, return a malloc'ed
- * string containing the new filename.sfx name.
- * Example:
- * FileAddSuffix("genbank", "ssi")
- * returns "genbank.ssi".
- */
-char *
-FileAddSuffix(char *filename, char *sfx)
-{
- char *new;
- new = MallocOrDie(strlen(filename) + strlen(sfx) + 2);
- sprintf(new, "%s.%s", filename, sfx);
- return new;
-}
-
-/* Function: EnvFileOpen()
- * Date: Sun Feb 12 10:55:29 1995
- *
- * Purpose: Open a file, given a file name and an environment
- * variable that contains a directory path. Files
- * are opened read-only. Does not look at current directory
- * unless "." is explicitly in the path specified by env.
- *
- * For instance:
- * fp = EnvFileOpen("BLOSUM45", "BLASTMAT", NULL);
- * or:
- * fp = EnvFileOpen("swiss", "BLASTDB", NULL);
- *
- * Environment variables may contain a colon-delimited
- * list of more than one path; e.g.
- * setenv BLASTDB /nfs/databases/foo:/nfs/databases/bar
- *
- * Sometimes a group of files may be found in
- * one directory; for instance, an index file with a
- * database. The caller can EnvFileOpen() the main
- * file, and ask to get the name of the
- * directory back in ret_dir, so it can construct
- * the other auxiliary file names and fopen() them. (If it called
- * EnvFileOpen(), it might get confused by
- * file name clashes and open files in different
- * directories.
- *
- * Args: fname - name of file to open
- * env - name of environment variable containing path
- * ret_dir - if non-NULL, RETURN: name of dir that was used.
- *
- * Return: FILE * to open file, or NULL on failure -- same as fopen()
- * Caller must free ret_dir if it passed a non-NULL address.
- */
-FILE *
-EnvFileOpen(char *fname, char *env, char **ret_dir)
-{
- FILE *fp;
- char *path;
- char *s; /* ptr to indiv element in env list */
- char full[1024]; /* constructed file name */
-
- if (env == NULL) return NULL;
- if ((path = Strdup(getenv(env))) == NULL) return NULL;
-
- fp = NULL;
- s = strtok(path, ":");
- while (s != NULL)
- {
- if (((int) strlen(fname) + (int) strlen(s) + 2) > 1024)
- { free(path); return NULL; }
- sprintf(full, "%s%c%s", s, DIRSLASH, fname);
- if ((fp = fopen(full, "r")) != NULL) break;
- s = strtok(NULL, ":");
- }
-
- /* Return the path we used, if caller wants it
- */
- if (ret_dir != NULL) *ret_dir = Strdup(s);
- free(path);
-
- return fp;
-}
-
-
-/* Function: FileExists()
- *
- * Purpose: Return TRUE if filename exists.
- * Testing fopen() is the only possible platform-independent test
- * I'm aware of.
- */
-int
-FileExists(char *filename)
-{
- FILE *fp;
- if ((fp = fopen(filename, "r"))) { fclose(fp); return TRUE; }
- return FALSE;
-}
-
-
diff --git a/squid/getopt.c b/squid/getopt.c
deleted file mode 100644
index 194834b..0000000
--- a/squid/getopt.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* CVS $Id: getopt.c,v 1.8 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "squid.h"
-
-/* Function: Getopt()
- *
- * Purpose: Portable command line option parsing with abbreviated
- * option switches. Replaces UNIX getopt(). Using UNIX getopt()
- * hinders portability to non-UNIX platforms, and getopt()
- * is also limited to single letter options.
- *
- * Getopt() implements a superset of UNIX getopt().
- * All of getopt()'s single-character switch behavior
- * is emulated, and "--" by itself terminates the options.
- * Additionally, Getopt() provides extended switches
- * like "--youroptionhere", and Getopt() type checks
- * arguments.
- *
- * Extended options must start with "--", as in "--option1".
- * Normal options must start with "-", as in "-o".
- * Normal options may be concatenated, as in "-a -b" == "-ab".
- *
- * See bottom of this .c file after #fdef GETOPT_TESTDRIVER
- * for an example of calling Getopt().
- *
- * Args: argc - from main(). number of elems in argv.
- * argv - from main(). argv[0] is the name of the command.
- * opt - array of opt_s structures, defining option switches
- * nopts - number of switches in opt
- * usage - a (possibly long) string to print if usage error.
- * ret_optind - RETURN: the index in argv[] of the next
- * valid command-line token.
- * ret_optname- RETURN: ptr to the name of option switch
- * seen, or NULL if no option was seen.
- * ret_optarg - RETURN: ptr to the optional argument, if any;
- * NULL if option takes no argument.
- *
- * Return: 1 if a valid option was parsed.
- * 0 if no option was found, and command-line parsing is complete.
- * Die()'s here if an error is detected.
- */
-int
-Getopt(int argc, char **argv, struct opt_s *opt, int nopts, char *usage,
- int *ret_optind, char **ret_optname, char **ret_optarg)
-{
- int i;
- int arglen;
- int nmatch;
- static int optind = 1; /* init to 1 on first call */
- static char *optptr = NULL; /* ptr to next valid switch */
- int opti = 0; /* init only to silence gcc uninit warnings */
-
- /* Check to see if we've run out of options.
- * A '-' by itself is an argument (e.g. "read from stdin")
- * not an option.
- */
- if (optind >= argc || argv[optind][0] != '-' || strcmp(argv[optind], "-") == 0)
- {
- *ret_optind = optind;
- *ret_optarg = NULL;
- *ret_optname = NULL;
- return 0;
- }
-
- /* Check to see if we're being told that this is the end
- * of the options with the special "--" flag.
- */
- if (strcmp(argv[optind], "--") == 0)
- {
- optind++;
- *ret_optind = optind;
- *ret_optname = NULL;
- *ret_optarg = NULL;
- return 0;
- }
-
- /* We have a real option. Find which one it is.
- * We handle single letter switches "-o" separately
- * from full switches "--option", based on the "-" vs. "--"
- * prefix -- single letter switches can be concatenated
- * as long as they don't have arguments.
- */
- /* full option */
- if (optptr == NULL && strncmp(argv[optind], "--", 2) == 0)
- {
- /* Use optptr to parse argument in options of form "--foo=666"
- */
- if ((optptr = strchr(argv[optind], '=')) != NULL)
- { *optptr = '\0'; optptr++; }
-
- arglen = strlen(argv[optind]);
- nmatch = 0;
- for (i = 0; i < nopts; i++)
- if (opt[i].single == FALSE &&
- strncmp(opt[i].name, argv[optind], arglen) == 0)
- {
- nmatch++;
- opti = i;
- if (arglen == strlen(opt[i].name)) break; /* exact match, stop now */
- }
- if (nmatch > 1 && arglen != strlen(opt[i].name))
- Die("Option \"%s\" is ambiguous; please be more specific.\n%s",
- argv[optind], usage);
- if (nmatch == 0)
- Die("No such option \"%s\".\n%s", argv[optind], usage);
-
- *ret_optname = opt[opti].name;
-
- /* Set the argument, if there is one
- */
- if (opt[opti].argtype != sqdARG_NONE)
- {
- if (optptr != NULL)
- { /* --foo=666 style */
- *ret_optarg = optptr;
- optptr = NULL;
- optind++;
- }
- else if (optind+1 >= argc)
- Die("Option %s requires an argument\n%s", opt[opti].name, usage);
- else /* "--foo 666" style */
- {
- *ret_optarg = argv[optind+1];
- optind+=2;
- }
- }
- else /* sqdARG_NONE */
- {
- if (optptr != NULL)
- Die("Option %s does not take an argument\n%s", opt[opti].name, usage);
- *ret_optarg = NULL;
- optind++;
- }
- }
- else /* else, a single letter option "-o" */
- {
- /* find the option */
- if (optptr == NULL)
- optptr = argv[optind]+1;
- for (opti = -1, i = 0; i < nopts; i++)
- if (opt[i].single == TRUE && *optptr == opt[i].name[1])
- { opti = i; break; }
- if (opti == -1)
- Die("No such option \"%c\".\n%s", *optptr, usage);
- *ret_optname = opt[opti].name;
-
- /* set the argument, if there is one */
- if (opt[opti].argtype != sqdARG_NONE)
- {
- if (*(optptr+1) != '\0') /* attached argument */
- {
- *ret_optarg = optptr+1;
- optind++;
- }
- else if (optind+1 < argc) /* unattached argument */
- {
- *ret_optarg = argv[optind+1];
- optind+=2;
- }
- else Die("Option %s requires an argument\n%s", opt[opti].name, usage);
-
- optptr = NULL; /* can't concatenate after an argument */
- }
- else /* sqdARG_NONE */
- {
- *ret_optarg = NULL;
- if (*(optptr+1) != '\0') /* concatenation */
- optptr++;
- else
- {
- optind++; /* move to next field */
- optptr = NULL;
- }
- }
-
- }
-
- /* Type check the argument, if there is one
- */
- if (opt[opti].argtype != sqdARG_NONE)
- {
- if (opt[opti].argtype == sqdARG_INT && ! IsInt(*ret_optarg))
- Die("Option %s requires an integer argument\n%s",
- opt[opti].name, usage);
- else if (opt[opti].argtype == sqdARG_FLOAT && ! IsReal(*ret_optarg))
- Die("Option %s requires a numerical argument\n%s",
- opt[opti].name, usage);
- else if (opt[opti].argtype == sqdARG_CHAR && strlen(*ret_optarg) != 1)
- Die("Option %s requires a single-character argument\n%s",
- opt[opti].name, usage);
- /* sqdARG_STRING is always ok, no type check necessary */
- }
-
- *ret_optind = optind;
- return 1;
-}
-
-
-
-#ifdef GETOPT_TESTDRIVER
-/* cc -DGETOPT_TESTDRIVER -L ~/lib/squid.linux/ getopt.c -lsquid
- */
-struct opt_s OPTIONS[] = {
- { "--test1", FALSE, sqdARG_INT },
- { "--test2", FALSE, sqdARG_FLOAT },
- { "--test3", FALSE, sqdARG_STRING },
- { "--test4", FALSE, sqdARG_CHAR },
- { "-a", TRUE, sqdARG_NONE },
- { "-b", TRUE, sqdARG_INT },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-int
-main(int argc, char **argv)
-{
- int optind;
- char *optarg;
- char *optname;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, "Usage/help here",
- &optind, &optname, &optarg))
- {
- printf("Option: index: %d name: %s argument: %s\n",
- optind, optname, optarg);
- }
- while (optind < argc)
- {
- printf("Argument: index: %d name: %s\n", optind, argv[optind]);
- optind++;
- }
-
-
-}
-
-
-#endif /*GETOPT_TESTDRIVER*/
diff --git a/squid/gki.c b/squid/gki.c
deleted file mode 100644
index 87e4d15..0000000
--- a/squid/gki.c
+++ /dev/null
@@ -1,390 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* gki.c
- * SRE, Sat May 1 14:49:08 1999
- *
- * "generic key index" module: emulation of Perl hashes.
- * Maps keys (ASCII char strings) to array index. Dynamically
- * resizes the hash table.
- *
- * Limitations:
- * - hash table can only grow; no provision for deleting keys
- * or downsizing the hash table.
- * - Maximum hash table size set at 100003. Performance
- * will degrade for key sets much larger than this.
- * - Assumes that integers are 32 bits (or greater).
- *
- * Defines a typedef'd structure:
- * gki - a key index hash table.
- * Provides functions:
- * GKIInit() - start a hash table.
- * GKIStoreKey() - store a new key, get a unique index.
- * GKIKeyIndex() - retrieve an existing key's index.
- * GKIFree() - free a hash table.
- * GKIStatus() - Debugging: prints internal status of a hash struct
- *
- *
- * Note that there are no dependencies on squid; the gki.c/gki.h
- * pair are base ANSI C and can be reused anywhere.
- *****************************************************************
- *
- * API for storing/reading stuff:
- * moral equivalent of Perl's $foo{$key} = whatever, $bar{$key} = whatever:
- * #include "gki.h"
- *
- * gki *hash;
- * int idx;
- * char *key;
- *
- * hash = GKIInit();
- * (Storing:)
- * (foreach key) {
- * idx = GKIStoreKey(hash, key);
- * (reallocate foo, bar as needed)
- * foo[idx] = whatever;
- * bar[idx] = whatever;
- * }
- * (Reading:)
- * (foreach key) {
- * idx = GKIKeyIndex(hash, key);
- * if (idx == -1) {no_such_key; }
- * (do something with) foo[idx];
- * (do something with) bar[idx];
- * }
- * GKIFree();
- *
- *****************************************************************
- *
- * Timings on wrasse for 45402 keys in /usr/dict/words using
- * Tests/test_gki:
- * 250 msec store (6 usec/store)
- * 140 msec retrieve (3 usec/retrieve)
- * and using the 13408 names of Pfam's GP120.full alignment:
- * 70 msec store (5 usec/store)
- * 50 msec retrieve (4 usec/retrieve)
- *
- * CVS $Id: gki.c,v 1.4 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-#include "squid.h"
-#include "gki.h"
-
-/*
- * Best hash table sizes are prime numbers (see Knuth vol 3, Sorting
- * and Searching).
- * gki_primes[] defines the ascending order of hash table sizes
- * that we use in upsizing the hash table dynamically.
- * useful site for testing primes:
- * http://www.idbsu.edu/people/jbrennan/algebra/numbers/sieve.html
- * Because of the way gki_hashvalue works, the largest number
- * must be < INT_MAX / 128 / 128 : 131072 on a 32 bit machine.
- */
-static int gki_primes[] = { 101, 1009, 10007, 100003 };
-#define GKI_NPRIMES 4
-#define GKI_ALPHABETSIZE 128
-
-static GKI *gki_alloc(int primelevel);
-static int gki_hashvalue(GKI *hash, char *key);
-static int gki_upsize(GKI *old);
-
-
-/* Function: GKIInit()
- * Date: SRE, Sat May 1 11:12:24 1999 [May Day geek-out]
- *
- * Purpose: Initialize a hash table for key indexing.
- * Simply a wrapper around a level 0 gki_alloc().
- *
- * Args: (void)
- *
- * Returns: An allocated hash table structure.
- * Caller frees with GKIFree().
- */
-GKI *
-GKIInit(void)
-{
- GKI *hash;
- hash = gki_alloc(0);
- return hash;
-}
-
-/* Function: GKIFree()
- * Date: SRE, Sat May 1 11:13:26 1999 [May Day geek-out]
- *
- * Purpose: Free a key index hash table.
- *
- * Args: hash - the gki structure
- *
- * Returns: (void).
- * hash table is destroyed.
- */
-void
-GKIFree(GKI *hash)
-{
- struct gki_elem *ptr;
- int i;
-
- if (hash == NULL) return; /* tolerate a NULL */
-
- for (i = 0; i < hash->nhash; i++)
- while (hash->table[i] != NULL)
- {
- ptr = hash->table[i]->nxt;
- /* NULL keys can occur after we've gki_upsize'd */
- if (hash->table[i]->key != NULL) free(hash->table[i]->key);
- free(hash->table[i]);
- hash->table[i] = ptr;
- }
- free(hash->table);
- free(hash);
-}
-
-/* Function: GKIStoreKey()
- * Date: SRE, Sat May 1 11:16:48 1999 [May Day geek-out]
- *
- * Purpose: Store a key in the key index hash table.
- * Associate it with a unique "key index", counting
- * from 0. (It's this index that lets us map
- * the hashed keys to indexed C arrays, (clumsily)
- * emulating Perl's hashes.)
- *
- * Does *not* check to see if the key's already
- * in the table, so it's possible to store multiple
- * copies of a key with different indices; probably
- * not what you want, so if you're not sure the
- * key is unique, check the table first with
- * GKIKeyIndex().
- *
- * Args: hash - GKI structure to store the key in
- * key - string to store
- *
- * Returns: the new key's index. Since it's always the
- * last one in the current array, this index is
- * just hash->nkeys-1.
- * On a malloc failure, returns -1.
- * hash table is modified.
- */
-int
-GKIStoreKey(GKI *hash, char *key)
-{
- int val;
- struct gki_elem *ptr;
-
- val = gki_hashvalue(hash, key);
-
- ptr = hash->table[val];
- hash->table[val] = MallocOrDie(sizeof(struct gki_elem));
- hash->table[val]->key = MallocOrDie(sizeof(char) * (strlen(key)+1));
- strcpy(hash->table[val]->key, key);
-
- hash->table[val]->idx = hash->nkeys;
- hash->table[val]->nxt = ptr;
-
- hash->nkeys++;
- /* time to upsize? */
- if (hash->nkeys > 3*hash->nhash && hash->primelevel < GKI_NPRIMES-1)
- gki_upsize(hash);
-
- return hash->nkeys-1;
-}
-
-/* Function: GKIKeyIndex()
- * Date: SRE, Sat May 1 11:20:42 1999 [May Day geek-out]
- *
- * Purpose: Look up a key in the hash table. Return
- * its index (0..nkeys-1), else -1 if the key
- * isn't in the hash (yet).
- *
- * Args: hash - the GKI hash table to search in
- * key - the key to look up
- *
- * Returns: -1 if key is not found;
- * index of key if it is found (range 0..nkeys-1).
- * hash table is unchanged.
- */
-int
-GKIKeyIndex(GKI *hash, char *key)
-{
- struct gki_elem *ptr;
- int val;
-
- val = gki_hashvalue(hash, key);
- for (ptr = hash->table[val]; ptr != NULL; ptr = ptr->nxt)
- if (strcmp(key, ptr->key) == 0) return ptr->idx;
- return -1;
-}
-
-/* Function: GKIStatus()
- * Date: SRE, Sat May 1 11:11:13 1999 [St. Louis]
- *
- * Purpose: (DEBUGGING) How are we doing? Calculate some
- * simple statistics for the hash table.
- *
- * Args: hash - the GKI hash table to look at
- *
- * Returns: (void)
- * Prints diagnostics on stdout.
- * hash table is unchanged.
- */
-void
-GKIStatus(GKI *hash)
-{
- struct gki_elem *ptr;
- int i;
- int nkeys;
- int nempty = 0;
- int maxkeys = -1;
- int minkeys = INT_MAX;
-
- for (i = 0; i < hash->nhash; i++)
- {
- nkeys = 0;
- for (ptr = hash->table[i]; ptr != NULL; ptr = ptr->nxt)
- nkeys++;
-
- if (nkeys == 0) nempty++;
- if (nkeys > maxkeys) maxkeys = nkeys;
- if (nkeys < minkeys) minkeys = nkeys;
- }
-
- printf("Total keys: %d\n", hash->nkeys);
- printf("Hash table size: %d\n", hash->nhash);
- printf("Average occupancy: %.1f\n", (float) hash->nkeys / (float) hash->nhash);
- printf("Unoccupied slots: %d\n", nempty);
- printf("Most in one slot: %d\n", maxkeys);
- printf("Least in one slot: %d\n", minkeys);
-
-}
-
-
-/* Function: gki_alloc()
- * Date: SRE, Sat May 1 11:55:47 1999 [May Day geek-out]
- *
- * Purpose: Allocate a hash table structure with the
- * size given by primelevel.
- *
- * Args: primelevel - level 0..GKI_NPRIMES-1, specifying
- * the size of the table; see gki_primes[]
- * array.
- *
- * Returns: An allocated hash table structure.
- * Caller frees with GKIFree().
- */
-static GKI *
-gki_alloc(int primelevel)
-{
- GKI *hash;
- int i;
-
- if (primelevel < 0 || primelevel >= GKI_NPRIMES)
- Die("bad primelevel in gki_alloc()");
- hash = MallocOrDie(sizeof(GKI));
-
- hash->primelevel = primelevel;
- hash->nhash = gki_primes[hash->primelevel];
- hash->table = MallocOrDie(sizeof(struct gki_elem) * hash->nhash);
- for (i = 0; i < hash->nhash; i++)
- hash->table[i] = NULL;
- hash->nkeys = 0;
- return hash;
-}
-
-
-/* Function: gki_hashvalue()
- * Date: SRE, Sat May 1 11:14:10 1999 [May Day geek-out]
- *
- * Purpose: Calculate the hash value for a key. Usually
- * we expect a one-word key, but the function will
- * hash any ASCII string effectively. The hash function
- * is a simple one (see p. 233 of Sedgewick,
- * Algorithms in C).
- * Slightly optimized: does two characters at a time
- * before doing the modulo; this gives us a significant
- * speedup.
- *
- * Args: hash - the gki structure (we need to know the hash table size)
- * key - a string to calculate the hash value for
- *
- * Returns: a hash value, in the range 0..hash->nhash-1.
- * hash table is unmodified.
- */
-static int
-gki_hashvalue(GKI *hash, char *key)
-{
- int val = 0;
-
- for (; *key != '\0'; key++)
- {
- val = GKI_ALPHABETSIZE*val + *key;
- if (*(++key) == '\0') { val = val % hash->nhash; break; }
- val = (GKI_ALPHABETSIZE*val + *key) % hash->nhash;
- }
- return val;
-}
-
-/* Function: gki_upsize()
- * Date: SRE, Sat May 1 11:46:07 1999 [May Day geek-out]
- *
- * Purpose: Grow the hash table to the next available size.
- *
- * Args: old - the GKI hash table to reallocate.
- *
- * Returns: 1 on success (the hash table is changed);
- * 0 on failure; the table is already at its maximum size,
- * and the hash table is returned unchanged.
- */
-static int
-gki_upsize(GKI *old)
-{
- GKI *new;
- int i;
- struct gki_elem *optr;
- struct gki_elem *nptr;
- int val;
-
- if (old->primelevel >= GKI_NPRIMES-1) return 0;
- new = gki_alloc(old->primelevel+1);
-
- /* Read the old, store in the new, while *not changing*
- * any key indices. Because of the way the lists are
- * treated as LIFO stacks, all the lists are reversed
- * in the new structure.
- */
- for (i = 0; i < old->nhash; i++)
- {
- optr = old->table[i];
- while (optr != NULL)
- {
- val = gki_hashvalue(new, optr->key);
-
- nptr = new->table[val];
- new->table[val] = optr;
- optr = optr->nxt;
- new->table[val]->nxt = nptr;
- }
- }
- free(old->table);
-
- /* Now swap within the interior of the structures, so the old
- * structure is updated to the new structure.
- * (nkeys is identical, so we don't need to swap that element.)
- */
- old->primelevel = new->primelevel;
- old->nhash = new->nhash;
- old->table = new->table;
- free(new);
- return 1;
-}
diff --git a/squid/gki.h b/squid/gki.h
deleted file mode 100644
index e589a26..0000000
--- a/squid/gki.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#ifndef SQUID_GKI_INCLUDED
-#define SQUID_GKI_INCLUDED
-
-/* gki.h
- * SRE, Sat May 1 15:07:22 1999
- *
- * Declarations of structures, functions for generic key index
- * module: emulation of Perl hashes. See gki.c.
- *
- * RCS $Id: gki.h,v 1.2 1999/07/15 22:30:45 eddy Exp $
- */
-
-/* gki_elem:
- * key, array index pairs are kept in linked list structures.
- */
-struct gki_elem {
- char *key;
- int idx;
- struct gki_elem *nxt;
-};
-
-/* gki:
- * a dynamically resized hash structure;
- * contains a hash table and associated data
- */
-typedef struct {
- struct gki_elem **table;
-
- int primelevel;
- int nhash;
- int nkeys;
-} GKI;
-
-GKI *GKIInit(void);
-void GKIFree(GKI *hash);
-int GKIHashValue(GKI *hash, char *key);
-int GKIStoreKey(GKI *hash, char *key);
-int GKIKeyIndex(GKI *hash, char *key);
-void GKIStatus(GKI *hash);
-
-#endif /* SQUID_GKI_INCLUDED */
diff --git a/squid/gsi.c b/squid/gsi.c
deleted file mode 100644
index 886b1ee..0000000
--- a/squid/gsi.c
+++ /dev/null
@@ -1,387 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* gsi.c
- * Interfaces for GSI "generic sequence index" files.
- * broken away from sqio.c and extended: SRE, Wed Aug 5 10:32:53 1998
- *
- *
- * GSI definition:
- * 1 + <nfiles> + <nkeys> total records.
- * Each record = 38 bytes.
- *
- * one header record : <"GSI" (32)> <nfiles (2)> <nkeys (4)>
- * <nfiles> file records : <filename (32)> <fileno (2)> <fmt (4)>
- * <nkeys> key records : <key (32)> <fileno (2)> <offset(4)>
- *
- * Matches up with my Perl scripts that create GSI files.
- *
- * CVS $Id: gsi.c,v 1.6 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef SEEK_SET
-#include <unistd.h> /* needed for poor crippled SunOS */
-#endif
-
-#include "squid.h"
-#include "gsi.h"
-
-
-/*****************************************************************
- * GSI index file access routines
- *****************************************************************/
-
-/* Function: GSIOpen()
- *
- * Purpose: Open a GSI file. Returns the number of records in
- * the file and a file pointer. Returns NULL on failure.
- * The file pointer should be fclose()'d normally.
- */
-GSIFILE *
-GSIOpen(char *gsifile)
-{
- GSIFILE *gsi;
- char magic[GSI_KEYSIZE];
-
- gsi = (GSIFILE *) MallocOrDie (sizeof(GSIFILE));
- if ((gsi->gsifp = fopen(gsifile, "r")) == NULL)
- { free(gsi); squid_errno = SQERR_NOFILE; return NULL; }
-
- if (! fread(magic, sizeof(char), GSI_KEYSIZE, gsi->gsifp))
- { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
- if (strcmp(magic, "GSI") != 0)
- { free(gsi); squid_errno = SQERR_FORMAT; return NULL; }
-
- if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp))
- { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
- if (! fread(&(gsi->recnum), sizeof(sqd_uint32), 1, gsi->gsifp))
- { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
-
- gsi->nfiles = sre_ntoh16(gsi->nfiles); /* convert from network short */
- gsi->recnum = sre_ntoh32(gsi->recnum); /* convert from network long */
-
- return gsi;
-}
-
-/* Function: GSIGetRecord()
- *
- * Purpose: Each non-header record of a GSI index files consists
- * of 38 bytes: 32 bytes of character string, a 2 byte
- * short, and a 4 byte long. This function returns the
- * three values.
- *
- * Args: gsi - open GSI index file, correctly positioned at a record
- * f1 - char[32], allocated by caller (or NULL if unwanted)
- * f2 - pointer to short (or NULL if unwanted)
- * f3 - pointer to long (or NULL if unwanted)
- *
- * Return: 0 on failure and sets squid_errno.
- */
-int
-GSIGetRecord(GSIFILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint32 *f3)
-{
- if (f1 == NULL) fseek(gsi->gsifp, GSI_KEYSIZE, SEEK_CUR);
- else if (! fread(f1, GSI_KEYSIZE, 1, gsi->gsifp))
- { squid_errno = SQERR_NODATA; return 0; }
-
- if (f2 == NULL) fseek(gsi->gsifp, sizeof(sqd_uint16), SEEK_CUR);
- else if (! fread(f2, sizeof(sqd_uint16), 1, gsi->gsifp))
- { squid_errno = SQERR_NODATA; return 0; }
-
- if (f3 == NULL) fseek(gsi->gsifp, sizeof(sqd_uint32), SEEK_CUR);
- else if (! fread(f3, sizeof(sqd_uint32), 1, gsi->gsifp))
- { squid_errno = SQERR_NODATA; return 0; }
-
- if (f2 != NULL) *f2 = sre_ntoh16(*f2);
- if (f3 != NULL) *f3 = sre_ntoh32(*f3);
-
- return 1;
-}
-
-
-/* Function: GSIGetOffset()
- *
- * Purpose: From a key (sequence name), find a disk offset
- * in an open general sequence index file by binary
- * search. Presumably GSI indexing could be even faster
- * if we used hashing.
- *
- * Args: gsi - GSI index file, opened by GSIOpen()
- * key - name of key to retrieve indices for
- * ret_seqfile - pre-alloced char[32] array for seqfile name
- * ret_fmt - format of seqfile
- * ret_offset - return: disk offset in seqfile.
- */
-int
-GSIGetOffset(GSIFILE *gsi, char *key, char *ret_seqfile,
- int *ret_format, long *ret_offset)
-{
- sqd_uint32 left, right, mid;
- int cmp;
- char name[GSI_KEYSIZE + 1];
- sqd_uint32 offset;
- sqd_uint16 filenum;
- sqd_uint32 fmt;
-
- name[GSI_KEYSIZE] = '\0';
-
- left = gsi->nfiles + 1;
- right = gsi->nfiles + gsi->recnum;
- mid = (left + right) / 2;
- fseek(gsi->gsifp, mid * GSI_RECSIZE, SEEK_SET);
-
- while (GSIGetRecord(gsi, name, &filenum, &offset))
- {
- cmp = strcmp(name, key);
- if (cmp == 0) break; /* found it! */
- else if (left >= right) return 0; /* oops, missed it; fail. */
- else if (cmp < 0) left = mid + 1; /* it's right of mid */
- else if (cmp > 0) right = mid - 1; /* it's left of mid */
- mid = (left + right) / 2;
- fseek(gsi->gsifp, mid * GSI_RECSIZE, SEEK_SET);
- }
-
- /* Using file number, look up the sequence file and format.
- */
- fseek(gsi->gsifp, filenum * GSI_RECSIZE, SEEK_SET);
- GSIGetRecord(gsi, ret_seqfile, NULL, &fmt);
- *ret_format = (int) fmt;
- *ret_offset = (long) offset;
-
- return 1;
-}
-
-/* Function: GSIClose()
- *
- * Purpose: Close an open GSI sequence index file.
- */
-void
-GSIClose(GSIFILE *gsi)
-{
- fclose(gsi->gsifp);
- free(gsi);
-}
-
-
-/*****************************************************************
- * GSI index construction routines
- * SRE, Wed Nov 10 11:49:14 1999 [St. Louis]
- *
- * API:
- * g = GSIAllocIndex();
- *
- * [foreach filename, <32 char, no directory path]
- * GSIAddFileToIndex(g, filename);
- * filenum++;
- * [foreach key, <32 char, w/ filenum 1..nfiles, w/ 32bit offset]
- * GSIAddKeyToIndex(g, key, filenum, offset);
- *
- * GSISortIndex(g);
- * GSIWriteIndex(fp, g);
- * GSIFreeIndex(g);
- *****************************************************************/
-struct gsiindex_s *
-GSIAllocIndex(void)
-{
- struct gsiindex_s *g;
-
- g = MallocOrDie(sizeof(struct gsiindex_s));
- g->filenames = MallocOrDie(sizeof(char *) * 10);
- g->fmt = MallocOrDie(sizeof(int) * 10);
- g->elems = MallocOrDie(sizeof(struct gsikey_s) * 100);
- g->nfiles = 0;
- g->nkeys = 0;
- return g;
-}
-void
-GSIFreeIndex(struct gsiindex_s *g)
-{
- int i;
- for (i = 0; i < g->nfiles; i++) free(g->filenames[i]);
- free(g->filenames);
- free(g->fmt);
- free(g->elems);
- free(g);
-}
-void
-GSIAddFileToIndex(struct gsiindex_s *g, char *filename, int fmt)
-{
- int len;
-
- len = strlen(filename);
- if (len >= GSI_KEYSIZE) Die("File name too long to be indexed.");
- g->filenames[g->nfiles] = sre_strdup(filename, len);
- g->fmt[g->nfiles] = fmt;
- g->nfiles++;
- if (g->nfiles % 10 == 0) {
- g->filenames = ReallocOrDie(g->filenames, sizeof(char *) * (g->nfiles + 10));
- g->fmt = ReallocOrDie(g->fmt, sizeof(int) * (g->nfiles + 10));
- }
-}
-void
-GSIAddKeyToIndex(struct gsiindex_s *g, char *key, int filenum, long offset)
-{
- if (strlen(key) >= GSI_KEYSIZE) Die("key too long in GSI index");
- if (filenum > SQD_UINT16_MAX) Die("too many files in GSI index");
- if (offset > SQD_UINT32_MAX) Die("offset too big in GSI index");
-
- strncpy(g->elems[g->nkeys].key, key, GSI_KEYSIZE-1);
- g->elems[g->nkeys].key[GSI_KEYSIZE-1] = '\0';
- g->elems[g->nkeys].filenum = (sqd_uint16) filenum;
- g->elems[g->nkeys].offset = (sqd_uint32) offset;
- g->nkeys++;
-
- if (g->nkeys % 100 == 0)
- g->elems = ReallocOrDie(g->elems, sizeof(struct gsikey_s) * (g->nkeys + 100));
-}
-static int
-gsi_keysorter(const void *k1, const void *k2)
-{
- struct gsikey_s *key1;
- struct gsikey_s *key2;
- key1 = (struct gsikey_s *) k1;
- key2 = (struct gsikey_s *) k2;
- return strcmp(key1->key, key2->key);
-}
-void
-GSISortIndex(struct gsiindex_s *g)
-{
- qsort((void *) g->elems, g->nkeys, sizeof(struct gsikey_s), gsi_keysorter);
-}
-void
-GSIWriteIndex(FILE *fp, struct gsiindex_s *g)
-{
- sqd_uint32 i;
-
- /* Range checking.
- */
- if (g->nfiles > SQD_UINT16_MAX) Die("Too many files in GSI index.");
- if (g->nkeys > SQD_UINT32_MAX) Die("Too many keys in GSI index.");
-
- GSIWriteHeader(fp, g->nfiles, g->nkeys);
- for (i = 0; i < g->nfiles; i++)
- GSIWriteFileRecord(fp, g->filenames[i], i+1, g->fmt[i]);
- for (i = 0; i < g->nkeys; i++)
- GSIWriteKeyRecord(fp, g->elems[i].key, g->elems[i].filenum, g->elems[i].offset);
-}
-
-
-
-
-
-/* Function: GSIWriteHeader()
- * Date: SRE, Wed Aug 5 10:36:02 1998 [St. Louis]
- *
- * Purpose: Write the first record to an open GSI file:
- * "GSI" <nfiles> <nkeys>
- *
- * Args: fp - open file to write to.
- * nfiles - number of files indexed
- * nkeys - number of keys indexed
- *
- * Returns: void
- */
-void
-GSIWriteHeader(FILE *fp, int nfiles, long nkeys)
-{
- char key[GSI_KEYSIZE];
- sqd_uint16 f1;
- sqd_uint32 f2;
-
- /* beware potential range errors!
- */
- if (nfiles > SQD_UINT16_MAX) Die("GSI: nfiles out of range");
- if (nkeys > SQD_UINT32_MAX) Die("GSI: nkeys out of range");
-
- f1 = (sqd_uint16) nfiles;
- f2 = (sqd_uint32) nkeys;
- f1 = sre_hton16(f1);
- f2 = sre_hton32(f2);
- strcpy(key, "GSI");
-
- if (fwrite(key, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC;
- if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
- if (fwrite(&f2, 4, 1, fp) < 1) PANIC;
-}
-
-
-/* Function: GSIWriteFileRecord()
- * Date: SRE, Wed Aug 5 10:45:51 1998 [St. Louis]
- *
- * Purpose: Write a file record to an open GSI file.
- *
- * Args: fp - open GSI file
- * fname - file name (max 31 characters)
- * idx - file number
- * fmt - file format (e.g. kPearson, etc.)
- *
- * Returns: 0 on failure. 1 on success.
- */
-int
-GSIWriteFileRecord(FILE *fp, char *fname, int idx, int fmt)
-{
- sqd_uint16 f1;
- sqd_uint32 f2;
-
- if (strlen(fname) >= GSI_KEYSIZE) return 0;
- if (idx > SQD_UINT16_MAX) Die("GSI: file index out of range");
- if (fmt > SQD_UINT32_MAX) Die("GSI: format index out of range");
-
- f1 = (sqd_uint16) idx;
- f2 = (sqd_uint32) fmt;
- f1 = sre_hton16(f1);
- f2 = sre_hton32(f2);
-
- if (fwrite(fname, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC;
- if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
- if (fwrite(&f2, 4, 1, fp) < 1) PANIC;
- return 1;
-}
-
-
-/* Function: GSIWriteKeyRecord()
- * Date: SRE, Wed Aug 5 10:52:30 1998 [St. Louis]
- *
- * Purpose: Write a key record to a GSI file.
- *
- * Args: fp - open GSI file for writing
- * key - key (max 31 char + \0)
- * fileidx - which file number to find this key in
- * offset - offset for this key
- *
- * Returns: 1 on success, else 0.
- * will fail if key >= 32 chars, for instance.
- */
-int
-GSIWriteKeyRecord(FILE *fp, char *key, int fileidx, long offset)
-{
- sqd_uint16 f1;
- sqd_uint32 f2;
-
- if (strlen(key) >= GSI_KEYSIZE) return 0;
- if (fileidx > SQD_UINT16_MAX) Die("GSI: file index out of range");
- if (offset > SQD_UINT32_MAX) Die("GSI: offset out of range");
-
- f1 = (sqd_uint16) fileidx;
- f2 = (sqd_uint32) offset;
- f1 = sre_hton16(f1);
- f2 = sre_hton32(f2);
-
- if (fwrite(key, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC;
- if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
- if (fwrite(&f2, 4, 1, fp) < 1) PANIC;
- return 1;
-}
-
diff --git a/squid/gsi.h b/squid/gsi.h
deleted file mode 100644
index 8329076..0000000
--- a/squid/gsi.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#ifndef GSIH_INCLUDED
-#define GSIH_INCLUDED
-
-/* gsi.h
- * Database indexing (GSI format support)
- * RCS $Id: gsi.h,v 1.3 2001/08/04 20:15:42 eddy Exp $
- *
- * A GSI (generic sequence index) file is composed of
- * recnum + nfiles + 1 records. Each record contains
- * three fields; key, file number, and disk offset.
- * Record 0 contains:
- * [ "GSI" ] [ nfiles ] [ recnum ]
- * Records 1..nfiles map file names to file numbers, and contain:
- * [ filename ] [ file number, 1..nfiles ] [ 0 (unused) ]
- * Records nfiles+1 to recnum+nfiles+1 provide disk offset
- * and file number indices for every key:
- * [ key ] [ file number ] [ offset]
- *
- * Because the file is binary, we take some (but not
- * complete) care to improve portability amongst platforms.
- * This means using network order integers (see ntohl())
- * and defining types for 16 and 32 bit integers.
- *
- * Because we use 32-bit offsets, ftell(), and fseek(),
- * there is an implicit 2 Gb file size maximum.
- * AFAIK neither ANSI C nor POSIX provide a portable solution
- * to this problem. fsetpos(), fgetpos() use an
- * opaque fpos_t datatype that we can't write portably
- * to a disk file. Suggestions welcomed.
- */
-#define GSI_KEYSIZE 32 /* keys are 32 bytes long */
-#define GSI_RECSIZE 38 /* 32 + 2 + 4 bytes */
-#define SQD_UINT16_MAX 65535 /* 2^16-1 */
-#define SQD_UINT32_MAX 4294967295U/* 2^32-1 */
-
-struct gsi_s {
- FILE *gsifp; /* open GSI index file */
- sqd_uint16 nfiles; /* number of files = 16 bit int */
- sqd_uint32 recnum; /* number of records = 32 bit int */
-};
-typedef struct gsi_s GSIFILE;
-
-struct gsikey_s {
- char key[GSI_KEYSIZE];
- sqd_uint16 filenum;
- sqd_uint32 offset;
-};
-struct gsiindex_s {
- char **filenames;
- int *fmt;
- sqd_uint16 nfiles;
-
- struct gsikey_s *elems;
- int nkeys;
-};
-
-
-/* from gsi.c
- */
-extern GSIFILE *GSIOpen(char *gsifile);
-extern int GSIGetRecord(GSIFILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint32 *f3);
-extern int GSIGetOffset(GSIFILE *gsi, char *key, char *sqfile,
- int *fmt, long *ret_offset);
-extern void GSIClose(GSIFILE *gsi);
-extern struct gsiindex_s *GSIAllocIndex(void);
-extern void GSIFreeIndex(struct gsiindex_s *g);
-extern void GSIAddFileToIndex(struct gsiindex_s *g, char *filename, int fmt);
-extern void GSIAddKeyToIndex(struct gsiindex_s *g, char *key, int filenum, long offset);
-extern void GSISortIndex(struct gsiindex_s *g);
-extern void GSIWriteIndex(FILE *fp, struct gsiindex_s *g);
-extern void GSIWriteHeader(FILE *fp, int nfiles, long nkeys);
-extern int GSIWriteFileRecord(FILE *fp, char *fname, int idx, int fmt);
-extern int GSIWriteKeyRecord(FILE *fp, char *key, int fileidx, long offset);
-
-#endif /*GSIH_INCLUDED*/
diff --git a/squid/gsi64.c b/squid/gsi64.c
deleted file mode 100644
index bedc4a5..0000000
--- a/squid/gsi64.c
+++ /dev/null
@@ -1,397 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-
-/* gsi64.c
- * Updated interfaces for GSI64 64-bit "generic sequence index" files.
- * See gsi.c for old interfaces.
- * This is a temporary hack! Needed for human genome project.
- */
-
-/* 1 + <nfiles> + <nkeys> total records.
- * Each record = 42 bytes.
- *
- * one header record : <"GSI64" (32)> <nfiles (2)> <nkeys (8)>
- * <nfiles> file records : <filename (32)> <fileno (2)> <fmt (8)>
- * <nkeys> key records : <key (32)> <fileno (2)> <offset(8)>
- *
- * CVS $Id: gsi64.c,v 1.3 2003/04/14 16:00:16 eddy Exp $
- */
-#include "squidconf.h"
-
-#ifdef USE_GSI64
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifndef SEEK_SET
-#include <unistd.h> /* needed for poor crippled SunOS */
-#endif
-
-#include "squid.h"
-#include "gsi64.h"
-
-/*****************************************************************
- * GSI64 index file access routines
- *****************************************************************/
-
-/* Function: GSI64Open()
- *
- * Purpose: Open a GSI64 file. Returns the number of records in
- * the file and a file pointer. Returns NULL on failure.
- * The file pointer should be fclose()'d normally.
- */
-GSI64FILE *
-GSI64Open(char *gsifile)
-{
- GSI64FILE *gsi;
- char magic[GSI64_KEYSIZE];
-
- gsi = (GSI64FILE *) MallocOrDie (sizeof(GSI64FILE));
- if ((gsi->gsifp = fopen(gsifile, "r")) == NULL)
- { free(gsi); squid_errno = SQERR_NOFILE; return NULL; }
-
- if (! fread(magic, sizeof(char), GSI64_KEYSIZE, gsi->gsifp))
- { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
- if (strcmp(magic, "GSI64") != 0)
- { free(gsi); squid_errno = SQERR_FORMAT; return NULL; }
-
- if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp))
- { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
- if (! fread(&(gsi->recnum), sizeof(sqd_uint64), 1, gsi->gsifp))
- { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
-
-#if 0 /* HACK! we don't byteswap */
- gsi->nfiles = sre_ntohs(gsi->nfiles); /* convert from network short */
- gsi->recnum = sre_ntohl(gsi->recnum); /* convert from network long */
-#endif
-
- return gsi;
-}
-
-/* Function: GSI64GetRecord()
- *
- * Purpose: Each non-header record of a GSI64 index file consists
- * of 42 bytes: 32 bytes of character string, a 2 byte
- * short, and an 8 byte long long. This function returns the
- * three values.
- *
- * Args: gsi - open GSI64 index file, correctly positioned at a record
- * f1 - char[32], allocated by caller (or NULL if unwanted)
- * f2 - pointer to short (or NULL if unwanted)
- * f3 - pointer to long long (or NULL if unwanted)
- *
- * Return: 0 on failure and sets squid_errno.
- */
-int
-GSI64GetRecord(GSI64FILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint64 *f3)
-{
- if (f1 == NULL) fseek64(gsi->gsifp, GSI64_KEYSIZE, SEEK_CUR);
- else if (! fread(f1, GSI64_KEYSIZE, 1, gsi->gsifp))
- { squid_errno = SQERR_NODATA; return 0; }
-
- if (f2 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint16), SEEK_CUR);
- else if (! fread(f2, sizeof(sqd_uint16), 1, gsi->gsifp))
- { squid_errno = SQERR_NODATA; return 0; }
-
- if (f3 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint64), SEEK_CUR);
- else if (! fread(f3, sizeof(sqd_uint64), 1, gsi->gsifp))
- { squid_errno = SQERR_NODATA; return 0; }
-
-#if 0 /* no byteswap yet! HACK! */
- if (f2 != NULL) *f2 = sre_ntohs(*f2);
- if (f3 != NULL) *f3 = sre_ntohl(*f3);
-#endif
-
- return 1;
-}
-
-
-/* Function: GSI64GetOffset()
- *
- * Purpose: From a key (sequence name), find a disk offset
- * in an open general sequence index file by binary
- * search. Presumably GSI64 indexing could be even faster
- * if we used hashing.
- *
- * Args: gsi - GSI64 index file, opened by GSI64Open()
- * key - name of key to retrieve indices for
- * ret_seqfile - pre-alloced char[32] array for seqfile name
- * ret_fmt - format of seqfile
- * ret_offset - return: disk offset in seqfile.
- */
-int
-GSI64GetOffset(GSI64FILE *gsi, char *key, char *ret_seqfile,
- int *ret_format, long long *ret_offset)
-{
- sqd_uint64 left, right, mid;
- int cmp;
- char name[GSI64_KEYSIZE + 1];
- sqd_uint64 offset;
- sqd_uint16 filenum;
- sqd_uint64 fmt;
-
- name[GSI64_KEYSIZE] = '\0';
-
- left = gsi->nfiles + 1;
- right = gsi->nfiles + gsi->recnum;
- mid = (left + right) / 2;
- fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET);
-
- while (GSI64GetRecord(gsi, name, &filenum, &offset))
- {
- cmp = strcmp(name, key);
- if (cmp == 0) break; /* found it! */
- else if (left >= right) return 0; /* oops, missed it; fail. */
- else if (cmp < 0) left = mid + 1; /* it's right of mid */
- else if (cmp > 0) right = mid - 1; /* it's left of mid */
- mid = (left + right) / 2;
- fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET);
- }
-
- /* Using file number, look up the sequence file and format.
- */
- fseek64(gsi->gsifp, filenum * GSI64_RECSIZE, SEEK_SET);
- GSI64GetRecord(gsi, ret_seqfile, NULL, &fmt);
- *ret_format = (int) fmt;
- *ret_offset = (long long) offset;
-
- return 1;
-}
-
-/* Function: GSI64Close()
- *
- * Purpose: Close an open GSI64 sequence index file.
- */
-void
-GSI64Close(GSI64FILE *gsi)
-{
- fclose(gsi->gsifp);
- free(gsi);
-}
-
-
-/*****************************************************************
- * GSI64 index construction routines
- * SRE, Wed Nov 10 11:49:14 1999 [St. Louis]
- *
- * API:
- * g = GSI64AllocIndex();
- *
- * [foreach filename, <32 char, no directory path]
- * GSI64AddFileToIndex(g, filename);
- * filenum++;
- * [foreach key, <32 char, w/ filenum 1..nfiles, w/ 64bit offset]
- * GSI64AddKeyToIndex(g, key, filenum, offset);
- *
- * GSI64SortIndex(g);
- * GSI64WriteIndex(fp, g);
- * GSI64FreeIndex(g);
- *****************************************************************/
-struct gsi64index_s *
-GSI64AllocIndex(void)
-{
- struct gsi64index_s *g;
-
- g = MallocOrDie(sizeof(struct gsi64index_s));
- g->filenames = MallocOrDie(sizeof(char *) * 10);
- g->fmt = MallocOrDie(sizeof(int) * 10);
- g->elems = MallocOrDie(sizeof(struct gsi64key_s) * 100);
- g->nfiles = 0;
- g->nkeys = 0;
- return g;
-}
-void
-GSI64FreeIndex(struct gsi64index_s *g)
-{
- int i;
- for (i = 0; i < g->nfiles; i++) free(g->filenames[i]);
- free(g->filenames);
- free(g->fmt);
- free(g->elems);
- free(g);
-}
-void
-GSI64AddFileToIndex(struct gsi64index_s *g, char *filename, int fmt)
-{
- int len;
-
- len = strlen(filename);
- if (len >= GSI64_KEYSIZE) Die("File name too long to be indexed.");
- g->filenames[g->nfiles] = sre_strdup(filename, len);
- g->fmt[g->nfiles] = fmt;
- g->nfiles++;
- if (g->nfiles % 10 == 0) {
- g->filenames = ReallocOrDie(g->filenames, sizeof(char *) * (g->nfiles + 10));
- g->fmt = ReallocOrDie(g->fmt, sizeof(int) * (g->nfiles + 10));
- }
-}
-void
-GSI64AddKeyToIndex(struct gsi64index_s *g, char *key, int filenum, long long offset)
-{
- if (strlen(key) >= GSI64_KEYSIZE) Die("key too long in GSI64 index");
- if (filenum > SQD_UINT16_MAX) Die("too many files in GSI64 index");
- if (offset > SQD_UINT64_MAX) Die("offset too big in GSI64 index");
-
- strncpy(g->elems[g->nkeys].key, key, GSI64_KEYSIZE-1);
- g->elems[g->nkeys].key[GSI64_KEYSIZE-1] = '\0';
- g->elems[g->nkeys].filenum = (sqd_uint16) filenum;
- g->elems[g->nkeys].offset = (sqd_uint64) offset;
- g->nkeys++;
-
- if (g->nkeys % 100 == 0)
- g->elems = ReallocOrDie(g->elems, sizeof(struct gsi64key_s) * (g->nkeys + 100));
-}
-static int
-gsi_keysorter(const void *k1, const void *k2)
-{
- struct gsi64key_s *key1;
- struct gsi64key_s *key2;
- key1 = (struct gsi64key_s *) k1;
- key2 = (struct gsi64key_s *) k2;
- return strcmp(key1->key, key2->key);
-}
-void
-GSI64SortIndex(struct gsi64index_s *g)
-{
- qsort((void *) g->elems, g->nkeys, sizeof(struct gsi64key_s), gsi_keysorter);
-}
-void
-GSI64WriteIndex(FILE *fp, struct gsi64index_s *g)
-{
- sqd_uint16 i;
- sqd_uint64 j;
-
- /* Range checking.
- */
- if (g->nfiles > SQD_UINT16_MAX) Die("Too many files in GSI64 index.");
- if (g->nkeys > SQD_UINT64_MAX) Die("Too many keys in GSI64 index.");
-
- GSI64WriteHeader(fp, g->nfiles, g->nkeys);
- for (i = 0; i < g->nfiles; i++)
- GSI64WriteFileRecord(fp, g->filenames[i], i+1, g->fmt[i]);
- for (j = 0; j < g->nkeys; j++)
- GSI64WriteKeyRecord(fp, g->elems[j].key, g->elems[j].filenum, g->elems[j].offset);
-}
-
-
-
-
-
-/* Function: GSI64WriteHeader()
- * Date: SRE, Wed Aug 5 10:36:02 1998 [St. Louis]
- *
- * Purpose: Write the first record to an open GSI64 file:
- * "GSI64" <nfiles> <nkeys>
- *
- * Args: fp - open file to write to.
- * nfiles - number of files indexed
- * nkeys - number of keys indexed
- *
- * Returns: void
- */
-void
-GSI64WriteHeader(FILE *fp, int nfiles, long long nkeys)
-{
- char key[GSI64_KEYSIZE];
- sqd_uint16 f1;
- sqd_uint64 f2;
-
- /* beware potential range errors!
- */
- if (nfiles > SQD_UINT16_MAX) Die("GSI64: nfiles out of range");
- if (nkeys > SQD_UINT64_MAX) Die("GSI64: nkeys out of range");
-
- f1 = (sqd_uint16) nfiles;
- f2 = (sqd_uint64) nkeys;
-#if 0 /* HACK no byteswap */
- f1 = sre_htons(f1);
- f2 = sre_htonl(f2);
-#endif
- strcpy(key, "GSI64");
-
- if (fwrite(key, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC;
- if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
- if (fwrite(&f2, 8, 1, fp) < 1) PANIC;
-}
-
-
-/* Function: GSI64WriteFileRecord()
- * Date: SRE, Wed Aug 5 10:45:51 1998 [St. Louis]
- *
- * Purpose: Write a file record to an open GSI64 file.
- *
- * Args: fp - open GSI64 file
- * fname - file name (max 31 characters)
- * idx - file number
- * fmt - file format (e.g. kPearson, etc.)
- *
- * Returns: 0 on failure. 1 on success.
- */
-int
-GSI64WriteFileRecord(FILE *fp, char *fname, int idx, int fmt)
-{
- sqd_uint16 f1;
- sqd_uint64 f2;
-
- if (strlen(fname) >= GSI64_KEYSIZE) return 0;
- if (idx > SQD_UINT16_MAX) Die("GSI64: file index out of range");
- if (fmt > SQD_UINT64_MAX) Die("GSI64: format index out of range");
-
- f1 = (sqd_uint16) idx;
- f2 = (sqd_uint64) fmt;
-#if 0 /* hack : no byteswap */
- f1 = sre_htons(f1);
- f2 = sre_htonl(f2);
-#endif
-
- if (fwrite(fname, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC;
- if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
- if (fwrite(&f2, 8, 1, fp) < 1) PANIC;
- return 1;
-}
-
-
-/* Function: GSI64WriteKeyRecord()
- * Date: SRE, Wed Aug 5 10:52:30 1998 [St. Louis]
- *
- * Purpose: Write a key record to a GSI64 file.
- *
- * Args: fp - open GSI64 file for writing
- * key - key (max 31 char + \0)
- * fileidx - which file number to find this key in
- * offset - offset for this key
- *
- * Returns: 1 on success, else 0.
- * will fail if key >= 32 chars, for instance.
- */
-int
-GSI64WriteKeyRecord(FILE *fp, char *key, int fileidx, long long offset)
-{
- sqd_uint16 f1;
- sqd_uint64 f2;
-
- if (strlen(key) >= GSI64_KEYSIZE) return 0;
- if (fileidx > SQD_UINT16_MAX) Die("GSI64: file index out of range");
- if (offset > SQD_UINT64_MAX) Die("GSI64: offset out of range");
-
- f1 = (sqd_uint16) fileidx;
- f2 = (sqd_uint64) offset;
-#if 0 /* HACK! */
- f1 = sre_htons(f1);
- f2 = sre_htonl(f2);
-#endif
-
- if (fwrite(key, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC;
- if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
- if (fwrite(&f2, 8, 1, fp) < 1) PANIC;
- return 1;
-}
-
-#endif /*USE_GSI64 */
diff --git a/squid/gsi64.h b/squid/gsi64.h
deleted file mode 100644
index f52b03e..0000000
--- a/squid/gsi64.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#ifndef GSI64H_INCLUDED
-#define GSI64H_INCLUDED
-#ifdef USE_GSI64
-
-/* gsi64.h
- * Database indexing (GSI64 format support)
- * CVS $Id: gsi64.h,v 1.2 2000/12/21 23:42:59 eddy Exp $
- *
- * A GSI64 (generic sequence index, 64 bit hack) file is composed of
- * recnum + nfiles + 1 records. Each record contains
- * three fields; key, file number, and disk offset.
- * Record 0 contains:
- * [ "GSI64" ] [ nfiles ] [ recnum ]
- * Records 1..nfiles map file names to file numbers, and contain:
- * [ filename ] [ file number, 1..nfiles ] [ 0 (unused) ]
- * Records nfiles+1 to recnum+nfiles+1 provide disk offset
- * and file number indices for every key:
- * [ key ] [ file number ] [ offset]
- *
- * Because the file is binary, we take some (but not
- * complete) care to improve portability amongst platforms.
- * This means using network order integers (see ntohl())
- * and defining types for 16 and 64 bit integers.
- *
- * A short test program that verifies the sizes of these
- * data types would be a good idea...
- *
- * Because we use 64-bit offsets, ftell64(), and fseek64(),
- * we rely on the OS actually providing these. This is
- * a temporary hack for human genome analysis.
- */
-typedef unsigned long long sqd_uint64; /* 64 bit integer. */
-
-#define GSI64_KEYSIZE 32 /* keys are 32 bytes long */
-#define GSI64_RECSIZE 42 /* 32 + 2 + 8 bytes */
-#define SQD_UINT16_MAX 65535 /* 2^16-1 */
-#define SQD_UINT64_MAX 18446744073709551615LU /* 2^64-1 */
-
-struct gsi64_s {
- FILE *gsifp; /* open GSI index file */
- sqd_uint16 nfiles; /* number of files = 16 bit int */
- sqd_uint64 recnum; /* number of records = 64 bit int */
-};
-typedef struct gsi64_s GSI64FILE;
-
-struct gsi64key_s {
- char key[GSI64_KEYSIZE];
- sqd_uint16 filenum;
- sqd_uint64 offset;
-};
-struct gsi64index_s {
- char **filenames;
- int *fmt;
- sqd_uint16 nfiles;
-
- struct gsi64key_s *elems;
- sqd_uint64 nkeys;
-};
-
-
-
-/* if ntohl() and friends are not available, you
- * can slip replacements in by providing sre_ntohl()
- * functions. (i.e., there is a possible portability problem here.)
- */
-#if 0
-#define sre_ntohl(x) ntohl(x);
-#define sre_ntohs(x) ntohs(x);
-#define sre_htonl(x) htonl(x);
-#define sre_htons(x) htons(x);
-#endif
-
-/* from gsi64.c
- */
-extern GSI64FILE *GSI64Open(char *gsifile);
-extern int GSI64GetRecord(GSI64FILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint64 *f3);
-extern int GSI64GetOffset(GSI64FILE *gsi, char *key, char *sqfile,
- int *fmt, long long *ret_offset);
-extern void GSI64Close(GSI64FILE *gsi);
-extern struct gsi64index_s *GSI64AllocIndex(void);
-extern void GSI64FreeIndex(struct gsi64index_s *g);
-extern void GSI64AddFileToIndex(struct gsi64index_s *g, char *filename, int fmt);
-extern void GSI64AddKeyToIndex(struct gsi64index_s *g, char *key, int filenum, long long offset);
-extern void GSI64SortIndex(struct gsi64index_s *g);
-extern void GSI64WriteIndex(FILE *fp, struct gsi64index_s *g);
-extern void GSI64WriteHeader(FILE *fp, int nfiles, long long nkeys);
-extern int GSI64WriteFileRecord(FILE *fp, char *fname, int idx, int fmt);
-extern int GSI64WriteKeyRecord(FILE *fp, char *key, int fileidx, long long offset);
-
-#endif /* USE_GSI64 */
-#endif /*GSIH_INCLUDED*/
diff --git a/squid/hsregex.c b/squid/hsregex.c
deleted file mode 100644
index 361fd0c..0000000
--- a/squid/hsregex.c
+++ /dev/null
@@ -1,1361 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/*****************************************************************
- * This code is an altered version of Henry Spencer's
- * regex library. Alterations are limited to minor streamlining,
- * and some name changes to protect the SQUID namespace.
- * Henry's copyright notice appears below.
- * You can obtain the original from
- * ftp://ftp.zoo.toronto.edu/pub/bookregex.tar.Z
- * Thanks, Henry!
- *
- * The magic word for compiling a testdriver: NBA_TEAM_IN_STL
- * gcc -o test -g -DNBA_TEAM_IN_STL -L. hsregex.c -lsquid -lm
- *
- * Usage:
- * test <pattern> <ntok> <string>
- *
- * SRE, Fri Aug 28 11:10:17 1998
- * CVS $Id: hsregex.c,v 1.9 2003/10/04 18:26:49 eddy Exp $
- *****************************************************************/
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include "squid.h"
-
-/* global sqd_parse[] are managed by Strparse().
- * WARNING: TODO: this code is not threadsafe, and needs to be revised.
- */
-char *sqd_parse[10];
-
-/* Function: Strparse()
- *
- * Purpose: Match a regexp to a string. Returns 1 if pattern matches,
- * else 0.
- *
- * Much like Perl, Strparse() makes copies of the matching
- * substrings available via globals, sqd_parse[].
- * sqd_parse[0] contains a copy of the complete matched
- * text. sqd_parse[1-9] contain copies of up to nine
- * different substrings matched within parentheses.
- * The memory for these strings is internally managed and
- * volatile; the next call to Strparse() may destroy them.
- * If the caller needs the matched substrings to persist
- * beyond a new Strparse() call, it must make its own
- * copies.
- *
- * A minor drawback of the memory management is that
- * there will be a small amount of unfree'd memory being
- * managed by Strparse() when a program exits; this may
- * confuse memory debugging (Purify, dbmalloc). The
- * general cleanup function SqdClean() is provided;
- * you can call this before exiting.
- *
- * Uses an extended POSIX regular expression interface.
- * A copylefted GNU implementation is included in the squid
- * implementation (gnuregex.c) for use on non-POSIX compliant
- * systems. POSIX 1003.2-compliant systems (all UNIX,
- * some WinNT, I believe) can omit the GNU code if necessary.
- *
- * I built this for ease of use, not speed nor efficiency.
- *
- * Example: Strparse("foo-...-baz", "foo-bar-baz") returns 0
- * Strparse("foo-(...)-baz", "foo-bar-baz")
- * returns 0; sqd_parse[0] is "foo-bar-baz";
- * sqd_parse[1] is "bar".
- *
- * A real example:
- * s = ">gnl|ti|3 G10P69425RH2.T0 {SUB 81..737} /len=657"
- * pat = "SUB ([0-9]+)"
- * Strparse(pat, s, 1)
- * returns 1; sqd_parse[1] is "81".
- *
- * Args: rexp - regular expression, extended POSIX form
- * s - string to match against
- * ntok - number of () substrings we will save (maximum NSUBEXP-1)
- *
- * Return: 1 on match, 0 if no match
- */
-int
-Strparse(char *rexp, char *s, int ntok)
-{
- sqd_regexp *pat;
- int code;
- int len;
- int i;
- /* sanity check */
- if (ntok >= NSUBEXP ) Die("Strparse(): ntok must be <= %d", NSUBEXP-1);
-
- /* Free previous global substring buffers
- */
- for (i = 0; i <= ntok; i++)
- if (sqd_parse[i] != NULL)
- {
- free(sqd_parse[i]);
- sqd_parse[i] = NULL;
- }
-
- /* Compile and match the pattern, using our modified
- * copy of Henry Spencer's regexp library
- */
- if ((pat = sqd_regcomp(rexp)) == NULL)
- Die("regexp compilation failed.");
- code = sqd_regexec(pat, s);
-
- /* Fill the global substring buffers
- */
- if (code == 1)
- for (i = 0; i <= ntok; i++)
- if (pat->startp[i] != NULL && pat->endp[i] != NULL)
- {
- len = pat->endp[i] - pat->startp[i];
- sqd_parse[i] = (char *) MallocOrDie(sizeof(char) * (len+1));
- strncpy(sqd_parse[i], pat->startp[i], len);
- sqd_parse[i][len] = '\0';
- }
-
- free(pat);
- return code;
-}
-
-/* Function: SqdClean()
- * Date: SRE, Wed Oct 29 12:52:08 1997 [TWA 721]
- *
- * Purpose: Clean up any squid library allocations before exiting
- * a program, so we don't leave unfree'd memory around
- * and confuse a malloc debugger like Purify or dbmalloc.
- */
-void
-SqdClean(void)
-{
- int i;
-
- /* Free global substring buffers that Strparse() uses
- */
- for (i = 0; i <= 9; i++)
- if (sqd_parse[i] != NULL) {
- free(sqd_parse[i]);
- sqd_parse[i] = NULL;
- }
-}
-
-
-
-/* all code below is:
- * Copyright (c) 1986, 1993, 1995 by University of Toronto.
- * Written by Henry Spencer. Not derived from licensed software.
- *
- * Permission is granted to anyone to use this software for any
- * purpose on any computer system, and to redistribute it in any way,
- * subject to the following restrictions:
- *
- * 1. The author is not responsible for the consequences of use of
- * this software, no matter how awful, even if they arise
- * from defects in it.
- *
- * 2. The origin of this software must not be misrepresented, either
- * by explicit claim or by omission.
- *
- * 3. Altered versions must be plainly marked as such, and must not
- * be misrepresented (by explicit claim or omission) as being
- * the original software.
- *
- * 4. This notice must not be removed or altered.
- */
-
-/*
- * sqd_regcomp and sqd_regexec -- sqd_regsub and sqd_regerror are elsewhere
- */
-
-/*
- * The first byte of the regexp internal "program" is actually this magic
- * number; the start node begins in the second byte.
- */
-#define SQD_REGMAGIC 0234
-
-/*
- * The "internal use only" fields in regexp.h are present to pass info from
- * compile to execute that permits the execute phase to run lots faster on
- * simple cases. They are:
- *
- * regstart char that must begin a match; '\0' if none obvious
- * reganch is the match anchored (at beginning-of-line only)?
- * regmust string (pointer into program) that match must include, or NULL
- * regmlen length of regmust string
- *
- * Regstart and reganch permit very fast decisions on suitable starting points
- * for a match, cutting down the work a lot. Regmust permits fast rejection
- * of lines that cannot possibly match. The regmust tests are costly enough
- * that sqd_regcomp() supplies a regmust only if the r.e. contains something
- * potentially expensive (at present, the only such thing detected is * or +
- * at the start of the r.e., which can involve a lot of backup). Regmlen is
- * supplied because the test in sqd_regexec() needs it and sqd_regcomp() is computing
- * it anyway.
- */
-
-/*
- * Structure for regexp "program". This is essentially a linear encoding
- * of a nondeterministic finite-state machine (aka syntax charts or
- * "railroad normal form" in parsing technology). Each node is an opcode
- * plus a "next" pointer, possibly plus an operand. "Next" pointers of
- * all nodes except BRANCH implement concatenation; a "next" pointer with
- * a BRANCH on both ends of it is connecting two alternatives. (Here we
- * have one of the subtle syntax dependencies: an individual BRANCH (as
- * opposed to a collection of them) is never concatenated with anything
- * because of operator precedence.) The operand of some types of node is
- * a literal string; for others, it is a node leading into a sub-FSM. In
- * particular, the operand of a BRANCH node is the first node of the branch.
- * (NB this is *not* a tree structure: the tail of the branch connects
- * to the thing following the set of BRANCHes.) The opcodes are:
- */
-
-/* definition number opnd? meaning */
-#define END 0 /* no End of program. */
-#define BOL 1 /* no Match beginning of line. */
-#define EOL 2 /* no Match end of line. */
-#define ANY 3 /* no Match any character. */
-#define ANYOF 4 /* str Match any of these. */
-#define ANYBUT 5 /* str Match any but one of these. */
-#define BRANCH 6 /* node Match this, or the next..\&. */
-#define BACK 7 /* no "next" ptr points backward. */
-#define EXACTLY 8 /* str Match this string. */
-#define NOTHING 9 /* no Match empty string. */
-#define STAR 10 /* node Match this 0 or more times. */
-#define PLUS 11 /* node Match this 1 or more times. */
-#define OPEN 20 /* no Sub-RE starts here. */
- /* OPEN+1 is number 1, etc. */
-#define CLOSE 30 /* no Analogous to OPEN. */
-
-/*
- * Opcode notes:
- *
- * BRANCH The set of branches constituting a single choice are hooked
- * together with their "next" pointers, since precedence prevents
- * anything being concatenated to any individual branch. The
- * "next" pointer of the last BRANCH in a choice points to the
- * thing following the whole choice. This is also where the
- * final "next" pointer of each individual branch points; each
- * branch starts with the operand node of a BRANCH node.
- *
- * BACK Normal "next" pointers all implicitly point forward; BACK
- * exists to make loop structures possible.
- *
- * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
- * BRANCH structures using BACK. Simple cases (one character
- * per match) are implemented with STAR and PLUS for speed
- * and to minimize recursive plunges.
- *
- * OPEN,CLOSE ...are numbered at compile time.
- */
-
-/*
- * A node is one char of opcode followed by two chars of "next" pointer.
- * "Next" pointers are stored as two 8-bit pieces, high order first. The
- * value is a positive offset from the opcode of the node containing it.
- * An operand, if any, simply follows the node. (Note that much of the
- * code generation knows about this implicit relationship.)
- *
- * Using two bytes for the "next" pointer is vast overkill for most things,
- * but allows patterns to get big without disasters.
- */
-#define OP(p) (*(p))
-#define NEXT(p) (((*((p)+1)&0177)<<8) + (*((p)+2)&0377))
-#define OPERAND(p) ((p) + 3)
-
-/*
- * Utility definitions.
- */
-#define FAIL(m) { sqd_regerror(m); return(NULL); }
-#define ISREPN(c) ((c) == '*' || (c) == '+' || (c) == '?')
-#define META "^$.[()|?+*\\"
-
-/*
- * Flags to be passed up and down.
- */
-#define HASWIDTH 01 /* Known never to match null string. */
-#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
-#define SPSTART 04 /* Starts with * or +. */
-#define WORST 0 /* Worst case. */
-
-/*
- * Work-variable struct for sqd_regcomp().
- */
-struct comp {
- char *regparse; /* Input-scan pointer. */
- int regnpar; /* () count. */
- char *regcode; /* Code-emit pointer; ®dummy = don't. */
- char regdummy[3]; /* NOTHING, 0 next ptr */
- long regsize; /* Code size. */
-};
-#define EMITTING(cp) ((cp)->regcode != (cp)->regdummy)
-
-/*
- * Forward declarations for sqd_regcomp()'s friends.
- */
-static char *reg(struct comp *cp, int paren, int *flagp);
-static char *regbranch(struct comp *cp, int *flagp);
-static char *regpiece(struct comp *cp, int *flagp);
-static char *regatom(struct comp *cp, int *flagp);
-static char *regnode(struct comp *cp, int op);
-static char *regnext(char *node);
-static void regc(struct comp *cp, int c);
-static void reginsert(struct comp *cp, int op, char *opnd);
-static void regtail(struct comp *cp, char *p, char *val);
-static void regoptail(struct comp *cp, char *p, char *val);
-
-/*
- - sqd_regcomp - compile a regular expression into internal code
- *
- * We can't allocate space until we know how big the compiled form will be,
- * but we can't compile it (and thus know how big it is) until we've got a
- * place to put the code. So we cheat: we compile it twice, once with code
- * generation turned off and size counting turned on, and once "for real".
- * This also means that we don't allocate space until we are sure that the
- * thing really will compile successfully, and we never have to move the
- * code and thus invalidate pointers into it. (Note that it has to be in
- * one piece because free() must be able to free it all.)
- *
- * Beware that the optimization-preparation code in here knows about some
- * of the structure of the compiled regexp.
- */
-sqd_regexp *
-sqd_regcomp(exp)
-const char *exp;
-{
- register sqd_regexp *r;
- register char *scan;
- int flags;
- struct comp co;
-
- if (exp == NULL)
- FAIL("NULL argument to sqd_regcomp");
-
- /* First pass: determine size, legality. */
- co.regparse = (char *)exp;
- co.regnpar = 1;
- co.regsize = 0L;
- co.regdummy[0] = NOTHING;
- co.regdummy[1] = co.regdummy[2] = 0;
- co.regcode = co.regdummy;
- regc(&co, SQD_REGMAGIC);
- if (reg(&co, 0, &flags) == NULL)
- return(NULL);
-
- /* Small enough for pointer-storage convention? */
- if (co.regsize >= 0x7fffL) /* Probably could be 0xffffL. */
- FAIL("regexp too big");
-
- /* Allocate space. */
- r = (sqd_regexp *)malloc(sizeof(sqd_regexp) + (size_t)co.regsize);
- if (r == NULL)
- FAIL("out of space");
-
- /* Second pass: emit code. */
- co.regparse = (char *)exp;
- co.regnpar = 1;
- co.regcode = r->program;
- regc(&co, SQD_REGMAGIC);
- if (reg(&co, 0, &flags) == NULL)
- return(NULL);
-
- /* Dig out information for optimizations. */
- r->regstart = '\0'; /* Worst-case defaults. */
- r->reganch = 0;
- r->regmust = NULL;
- r->regmlen = 0;
- scan = r->program+1; /* First BRANCH. */
- if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
- scan = OPERAND(scan);
-
- /* Starting-point info. */
- if (OP(scan) == EXACTLY)
- r->regstart = *OPERAND(scan);
- else if (OP(scan) == BOL)
- r->reganch = 1;
-
- /*
- * If there's something expensive in the r.e., find the
- * longest literal string that must appear and make it the
- * regmust. Resolve ties in favor of later strings, since
- * the regstart check works with the beginning of the r.e.
- * and avoiding duplication strengthens checking. Not a
- * strong reason, but sufficient in the absence of others.
- */
- if (flags&SPSTART) {
- register char *longest = NULL;
- register size_t len = 0;
-
- for (; scan != NULL; scan = regnext(scan))
- if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
- longest = OPERAND(scan);
- len = strlen(OPERAND(scan));
- }
- r->regmust = longest;
- r->regmlen = (int)len;
- }
- }
-
- return(r);
-}
-
-/*
- - reg - regular expression, i.e. main body or parenthesized thing
- *
- * Caller must absorb opening parenthesis.
- *
- * Combining parenthesis handling with the base level of regular expression
- * is a trifle forced, but the need to tie the tails of the branches to what
- * follows makes it hard to avoid.
- */
-static char *
-reg(cp, paren, flagp)
-register struct comp *cp;
-int paren; /* Parenthesized? */
-int *flagp;
-{
- register char *ret = NULL; /* SRE: NULL init added to silence gcc */
- register char *br;
- register char *ender;
- register int parno = 0; /* SRE: init added to silence gcc */
- int flags;
-
- *flagp = HASWIDTH; /* Tentatively. */
-
- if (paren) {
- /* Make an OPEN node. */
- if (cp->regnpar >= NSUBEXP)
- FAIL("too many ()");
- parno = cp->regnpar;
- cp->regnpar++;
- ret = regnode(cp, OPEN+parno);
- }
-
- /* Pick up the branches, linking them together. */
- br = regbranch(cp, &flags);
- if (br == NULL)
- return(NULL);
- if (paren)
- regtail(cp, ret, br); /* OPEN -> first. */
- else
- ret = br;
- *flagp &= ~(~flags&HASWIDTH); /* Clear bit if bit 0. */
- *flagp |= flags&SPSTART;
- while (*cp->regparse == '|') {
- cp->regparse++;
- br = regbranch(cp, &flags);
- if (br == NULL)
- return(NULL);
- regtail(cp, ret, br); /* BRANCH -> BRANCH. */
- *flagp &= ~(~flags&HASWIDTH);
- *flagp |= flags&SPSTART;
- }
-
- /* Make a closing node, and hook it on the end. */
- ender = regnode(cp, (paren) ? CLOSE+parno : END);
- regtail(cp, ret, ender);
-
- /* Hook the tails of the branches to the closing node. */
- for (br = ret; br != NULL; br = regnext(br))
- regoptail(cp, br, ender);
-
- /* Check for proper termination. */
- if (paren && *cp->regparse++ != ')') {
- FAIL("unterminated ()");
- } else if (!paren && *cp->regparse != '\0') {
- if (*cp->regparse == ')') {
- FAIL("unmatched ()");
- } else
- FAIL("internal error: junk on end");
- /* NOTREACHED */
- }
-
- return(ret);
-}
-
-/*
- - regbranch - one alternative of an | operator
- *
- * Implements the concatenation operator.
- */
-static char *
-regbranch(cp, flagp)
-register struct comp *cp;
-int *flagp;
-{
- register char *ret;
- register char *chain;
- register char *latest;
- int flags;
- register int c;
-
- *flagp = WORST; /* Tentatively. */
-
- ret = regnode(cp, BRANCH);
- chain = NULL;
- while ((c = *cp->regparse) != '\0' && c != '|' && c != ')') {
- latest = regpiece(cp, &flags);
- if (latest == NULL)
- return(NULL);
- *flagp |= flags&HASWIDTH;
- if (chain == NULL) /* First piece. */
- *flagp |= flags&SPSTART;
- else
- regtail(cp, chain, latest);
- chain = latest;
- }
- if (chain == NULL) /* Loop ran zero times. */
- (void) regnode(cp, NOTHING);
-
- return(ret);
-}
-
-/*
- - regpiece - something followed by possible [*+?]
- *
- * Note that the branching code sequences used for ? and the general cases
- * of * and + are somewhat optimized: they use the same NOTHING node as
- * both the endmarker for their branch list and the body of the last branch.
- * It might seem that this node could be dispensed with entirely, but the
- * endmarker role is not redundant.
- */
-static char *
-regpiece(cp, flagp)
-register struct comp *cp;
-int *flagp;
-{
- register char *ret;
- register char op;
- register char *next;
- int flags;
-
- ret = regatom(cp, &flags);
- if (ret == NULL)
- return(NULL);
-
- op = *cp->regparse;
- if (!ISREPN(op)) {
- *flagp = flags;
- return(ret);
- }
-
- if (!(flags&HASWIDTH) && op != '?')
- FAIL("*+ operand could be empty");
- switch (op) {
- case '*': *flagp = WORST|SPSTART; break;
- case '+': *flagp = WORST|SPSTART|HASWIDTH; break;
- case '?': *flagp = WORST; break;
- }
-
- if (op == '*' && (flags&SIMPLE))
- reginsert(cp, STAR, ret);
- else if (op == '*') {
- /* Emit x* as (x&|), where & means "self". */
- reginsert(cp, BRANCH, ret); /* Either x */
- regoptail(cp, ret, regnode(cp, BACK)); /* and loop */
- regoptail(cp, ret, ret); /* back */
- regtail(cp, ret, regnode(cp, BRANCH)); /* or */
- regtail(cp, ret, regnode(cp, NOTHING)); /* null. */
- } else if (op == '+' && (flags&SIMPLE))
- reginsert(cp, PLUS, ret);
- else if (op == '+') {
- /* Emit x+ as x(&|), where & means "self". */
- next = regnode(cp, BRANCH); /* Either */
- regtail(cp, ret, next);
- regtail(cp, regnode(cp, BACK), ret); /* loop back */
- regtail(cp, next, regnode(cp, BRANCH)); /* or */
- regtail(cp, ret, regnode(cp, NOTHING)); /* null. */
- } else if (op == '?') {
- /* Emit x? as (x|) */
- reginsert(cp, BRANCH, ret); /* Either x */
- regtail(cp, ret, regnode(cp, BRANCH)); /* or */
- next = regnode(cp, NOTHING); /* null. */
- regtail(cp, ret, next);
- regoptail(cp, ret, next);
- }
- cp->regparse++;
- if (ISREPN(*cp->regparse))
- FAIL("nested *?+");
-
- return(ret);
-}
-
-/*
- - regatom - the lowest level
- *
- * Optimization: gobbles an entire sequence of ordinary characters so that
- * it can turn them into a single node, which is smaller to store and
- * faster to run. Backslashed characters are exceptions, each becoming a
- * separate node; the code is simpler that way and it's not worth fixing.
- */
-static char *
-regatom(cp, flagp)
-register struct comp *cp;
-int *flagp;
-{
- register char *ret;
- int flags;
-
- *flagp = WORST; /* Tentatively. */
-
- switch (*cp->regparse++) {
- case '^':
- ret = regnode(cp, BOL);
- break;
- case '$':
- ret = regnode(cp, EOL);
- break;
- case '.':
- ret = regnode(cp, ANY);
- *flagp |= HASWIDTH|SIMPLE;
- break;
- case '[': {
- register int range;
- register int rangeend;
- register int c;
-
- if (*cp->regparse == '^') { /* Complement of range. */
- ret = regnode(cp, ANYBUT);
- cp->regparse++;
- } else
- ret = regnode(cp, ANYOF);
- if ((c = *cp->regparse) == ']' || c == '-') {
- regc(cp, c);
- cp->regparse++;
- }
- while ((c = *cp->regparse++) != '\0' && c != ']') {
- if (c != '-')
- regc(cp, c);
- else if ((c = *cp->regparse) == ']' || c == '\0')
- regc(cp, '-');
- else {
- range = (unsigned char)*(cp->regparse-2);
- rangeend = (unsigned char)c;
- if (range > rangeend)
- FAIL("invalid [] range");
- for (range++; range <= rangeend; range++)
- regc(cp, range);
- cp->regparse++;
- }
- }
- regc(cp, '\0');
- if (c != ']')
- FAIL("unmatched []");
- *flagp |= HASWIDTH|SIMPLE;
- break;
- }
- case '(':
- ret = reg(cp, 1, &flags);
- if (ret == NULL)
- return(NULL);
- *flagp |= flags&(HASWIDTH|SPSTART);
- break;
- case '\0':
- case '|':
- case ')':
- /* supposed to be caught earlier */
- FAIL("internal error: \\0|) unexpected");
- /*NOTREACHED*/
- break;
- case '?':
- case '+':
- case '*':
- FAIL("?+* follows nothing");
- /*NOTREACHED*/
- break;
- case '\\':
- if (*cp->regparse == '\0')
- FAIL("trailing \\");
- ret = regnode(cp, EXACTLY);
- regc(cp, *cp->regparse++);
- regc(cp, '\0');
- *flagp |= HASWIDTH|SIMPLE;
- break;
- default: {
- register size_t len;
- register char ender;
-
- cp->regparse--;
- len = strcspn(cp->regparse, META);
- if (len == 0)
- FAIL("internal error: strcspn 0");
- ender = *(cp->regparse+len);
- if (len > 1 && ISREPN(ender))
- len--; /* Back off clear of ?+* operand. */
- *flagp |= HASWIDTH;
- if (len == 1)
- *flagp |= SIMPLE;
- ret = regnode(cp, EXACTLY);
- for (; len > 0; len--)
- regc(cp, *cp->regparse++);
- regc(cp, '\0');
- break;
- }
- }
-
- return(ret);
-}
-
-/*
- - regnode - emit a node
- */
-static char * /* Location. */
-regnode(cp, op)
-register struct comp *cp;
-char op;
-{
- register char *const ret = cp->regcode;
- register char *ptr;
-
- if (!EMITTING(cp)) {
- cp->regsize += 3;
- return(ret);
- }
-
- ptr = ret;
- *ptr++ = op;
- *ptr++ = '\0'; /* Null next pointer. */
- *ptr++ = '\0';
- cp->regcode = ptr;
-
- return(ret);
-}
-
-/*
- - regc - emit (if appropriate) a byte of code
- */
-static void
-regc(cp, b)
-register struct comp *cp;
-char b;
-{
- if (EMITTING(cp))
- *cp->regcode++ = b;
- else
- cp->regsize++;
-}
-
-/*
- - reginsert - insert an operator in front of already-emitted operand
- *
- * Means relocating the operand.
- */
-static void
-reginsert(cp, op, opnd)
-register struct comp *cp;
-char op;
-char *opnd;
-{
- register char *place;
-
- if (!EMITTING(cp)) {
- cp->regsize += 3;
- return;
- }
-
- (void) memmove(opnd+3, opnd, (size_t)(cp->regcode - opnd));
- cp->regcode += 3;
-
- place = opnd; /* Op node, where operand used to be. */
- *place++ = op;
- *place++ = '\0';
- *place++ = '\0';
-}
-
-/*
- - regtail - set the next-pointer at the end of a node chain
- */
-static void
-regtail(cp, p, val)
-register struct comp *cp;
-char *p;
-char *val;
-{
- register char *scan;
- register char *temp;
- register int offset;
-
- if (!EMITTING(cp))
- return;
-
- /* Find last node. */
- for (scan = p; (temp = regnext(scan)) != NULL; scan = temp)
- continue;
-
- offset = (OP(scan) == BACK) ? scan - val : val - scan;
- *(scan+1) = (offset>>8)&0177;
- *(scan+2) = offset&0377;
-}
-
-/*
- - regoptail - regtail on operand of first argument; nop if operandless
- */
-static void
-regoptail(cp, p, val)
-register struct comp *cp;
-char *p;
-char *val;
-{
- /* "Operandless" and "op != BRANCH" are synonymous in practice. */
- if (!EMITTING(cp) || OP(p) != BRANCH)
- return;
- regtail(cp, OPERAND(p), val);
-}
-
-/*
- * sqd_regexec and friends
- */
-
-/*
- * Work-variable struct for sqd_regexec().
- */
-struct exec {
- char *reginput; /* String-input pointer. */
- char *regbol; /* Beginning of input, for ^ check. */
- char **regstartp; /* Pointer to startp array. */
- char **regendp; /* Ditto for endp. */
-};
-
-/*
- * Forwards.
- */
-static int regtry(struct exec *ep, sqd_regexp *rp, char *string);
-static int regmatch(struct exec *ep, char *prog);
-static size_t regrepeat(struct exec *ep, char *node);
-
-#ifdef DEBUG
-int regnarrate = 0;
-void regdump();
-static char *regprop();
-#endif
-
-/*
- - sqd_regexec - match a regexp against a string
- */
-int
-sqd_regexec(prog, str)
-register sqd_regexp *prog;
-const char *str;
-{
- register char *string = (char *)str; /* avert const poisoning */
- register char *s;
- struct exec ex;
-
- /* Be paranoid. */
- if (prog == NULL || string == NULL) {
- sqd_regerror("NULL argument to sqd_regexec");
- return(0);
- }
-
- /* Check validity of program. */
- if ((unsigned char)*prog->program != SQD_REGMAGIC) {
- sqd_regerror("corrupted regexp");
- return(0);
- }
-
- /* If there is a "must appear" string, look for it. */
- if (prog->regmust != NULL && strstr(string, prog->regmust) == NULL)
- return(0);
-
- /* Mark beginning of line for ^ . */
- ex.regbol = string;
- ex.regstartp = prog->startp;
- ex.regendp = prog->endp;
-
- /* Simplest case: anchored match need be tried only once. */
- if (prog->reganch)
- return(regtry(&ex, prog, string));
-
- /* Messy cases: unanchored match. */
- if (prog->regstart != '\0') {
- /* We know what char it must start with. */
- for (s = string; s != NULL; s = strchr(s+1, prog->regstart))
- if (regtry(&ex, prog, s))
- return(1);
- return(0);
- } else {
- /* We don't -- general case. */
- for (s = string; !regtry(&ex, prog, s); s++)
- if (*s == '\0')
- return(0);
- return(1);
- }
- /* NOTREACHED */
-}
-
-/*
- - regtry - try match at specific point
- */
-static int /* 0 failure, 1 success */
-regtry(ep, prog, string)
-register struct exec *ep;
-sqd_regexp *prog;
-char *string;
-{
- register int i;
- register char **stp;
- register char **enp;
-
- ep->reginput = string;
-
- stp = prog->startp;
- enp = prog->endp;
- for (i = NSUBEXP; i > 0; i--) {
- *stp++ = NULL;
- *enp++ = NULL;
- }
- if (regmatch(ep, prog->program + 1)) {
- prog->startp[0] = string;
- prog->endp[0] = ep->reginput;
- return(1);
- } else
- return(0);
-}
-
-/*
- - regmatch - main matching routine
- *
- * Conceptually the strategy is simple: check to see whether the current
- * node matches, call self recursively to see whether the rest matches,
- * and then act accordingly. In practice we make some effort to avoid
- * recursion, in particular by going through "ordinary" nodes (that don't
- * need to know whether the rest of the match failed) by a loop instead of
- * by recursion.
- */
-static int /* 0 failure, 1 success */
-regmatch(ep, prog)
-register struct exec *ep;
-char *prog;
-{
- register char *scan; /* Current node. */
- char *next; /* Next node. */
-
-#ifdef DEBUG
- if (prog != NULL && regnarrate)
- fprintf(stderr, "%s(\n", regprop(prog));
-#endif
- for (scan = prog; scan != NULL; scan = next) {
-#ifdef DEBUG
- if (regnarrate)
- fprintf(stderr, "%s...\n", regprop(scan));
-#endif
- next = regnext(scan);
-
- switch (OP(scan)) {
- case BOL:
- if (ep->reginput != ep->regbol)
- return(0);
- break;
- case EOL:
- if (*ep->reginput != '\0')
- return(0);
- break;
- case ANY:
- if (*ep->reginput == '\0')
- return(0);
- ep->reginput++;
- break;
- case EXACTLY: {
- register size_t len;
- register char *const opnd = OPERAND(scan);
-
- /* Inline the first character, for speed. */
- if (*opnd != *ep->reginput)
- return(0);
- len = strlen(opnd);
- if (len > 1 && strncmp(opnd, ep->reginput, len) != 0)
- return(0);
- ep->reginput += len;
- break;
- }
- case ANYOF:
- if (*ep->reginput == '\0' ||
- strchr(OPERAND(scan), *ep->reginput) == NULL)
- return(0);
- ep->reginput++;
- break;
- case ANYBUT:
- if (*ep->reginput == '\0' ||
- strchr(OPERAND(scan), *ep->reginput) != NULL)
- return(0);
- ep->reginput++;
- break;
- case NOTHING:
- break;
- case BACK:
- break;
- case OPEN+1: case OPEN+2: case OPEN+3:
- case OPEN+4: case OPEN+5: case OPEN+6:
- case OPEN+7: case OPEN+8: case OPEN+9: {
- register const int no = OP(scan) - OPEN;
- register char *const input = ep->reginput;
-
- if (regmatch(ep, next)) {
- /*
- * Don't set startp if some later
- * invocation of the same parentheses
- * already has.
- */
- if (ep->regstartp[no] == NULL)
- ep->regstartp[no] = input;
- return(1);
- } else
- return(0);
- /*NOTREACHED*/
- break;
- }
- case CLOSE+1: case CLOSE+2: case CLOSE+3:
- case CLOSE+4: case CLOSE+5: case CLOSE+6:
- case CLOSE+7: case CLOSE+8: case CLOSE+9: {
- register const int no = OP(scan) - CLOSE;
- register char *const input = ep->reginput;
-
- if (regmatch(ep, next)) {
- /*
- * Don't set endp if some later
- * invocation of the same parentheses
- * already has.
- */
- if (ep->regendp[no] == NULL)
- ep->regendp[no] = input;
- return(1);
- } else
- return(0);
- /*NOTREACHED*/
- break;
- }
- case BRANCH: {
- register char *const save = ep->reginput;
-
- if (OP(next) != BRANCH) /* No choice. */
- next = OPERAND(scan); /* Avoid recursion. */
- else {
- while (OP(scan) == BRANCH) {
- if (regmatch(ep, OPERAND(scan)))
- return(1);
- ep->reginput = save;
- scan = regnext(scan);
- }
- return(0);
- /*NOTREACHED*/
- }
- break;
- }
- case STAR: case PLUS: {
- register const char nextch =
- (OP(next) == EXACTLY) ? *OPERAND(next) : '\0';
- register size_t no;
- register char *const save = ep->reginput;
- register const size_t min = (OP(scan) == STAR) ? 0 : 1;
-
- for (no = regrepeat(ep, OPERAND(scan)) + 1; no > min; no--) {
- ep->reginput = save + no - 1;
- /* If it could work, try it. */
- if (nextch == '\0' || *ep->reginput == nextch)
- if (regmatch(ep, next))
- return(1);
- }
- return(0);
- /*NOTREACHED*/
- break;
- }
- case END:
- return(1); /* Success! */
- break;
- default:
- sqd_regerror("regexp corruption");
- return(0);
- /*NOTREACHED*/
- break;
- }
- }
-
- /*
- * We get here only if there's trouble -- normally "case END" is
- * the terminating point.
- */
- sqd_regerror("corrupted pointers");
- return(0);
-}
-
-/*
- - regrepeat - report how many times something simple would match
- */
-static size_t
-regrepeat(ep, node)
-register struct exec *ep;
-char *node;
-{
- register size_t count;
- register char *scan;
- register char ch;
-
- switch (OP(node)) {
- case ANY:
- return(strlen(ep->reginput));
- break;
- case EXACTLY:
- ch = *OPERAND(node);
- count = 0;
- for (scan = ep->reginput; *scan == ch; scan++)
- count++;
- return(count);
- /*NOTREACHED*/
- break;
- case ANYOF:
- return(strspn(ep->reginput, OPERAND(node)));
- break;
- case ANYBUT:
- return(strcspn(ep->reginput, OPERAND(node)));
- break;
- default: /* Oh dear. Called inappropriately. */
- sqd_regerror("internal error: bad call of regrepeat");
- return(0); /* Best compromise. */
- /*NOTREACHED*/
- break;
- }
- /* NOTREACHED */
-}
-
-/*
- - regnext - dig the "next" pointer out of a node
- */
-static char *
-regnext(p)
-register char *p;
-{
- register const int offset = NEXT(p);
-
- if (offset == 0)
- return(NULL);
-
- return((OP(p) == BACK) ? p-offset : p+offset);
-}
-
-#ifdef DEBUG
-
-static char *regprop();
-
-/*
- - regdump - dump a regexp onto stdout in vaguely comprehensible form
- */
-void
-regdump(r)
-sqd_regexp *r;
-{
- register char *s;
- register char op = EXACTLY; /* Arbitrary non-END op. */
- register char *next;
-
-
- s = r->program + 1;
- while (op != END) { /* While that wasn't END last time... */
- op = OP(s);
- printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
- next = regnext(s);
- if (next == NULL) /* Next ptr. */
- printf("(0)");
- else
- printf("(%d)", (s-r->program)+(next-s));
- s += 3;
- if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
- /* Literal string, where present. */
- while (*s != '\0') {
- putchar(*s);
- s++;
- }
- s++;
- }
- putchar('\n');
- }
-
- /* Header fields of interest. */
- if (r->regstart != '\0')
- printf("start `%c' ", r->regstart);
- if (r->reganch)
- printf("anchored ");
- if (r->regmust != NULL)
- printf("must have \"%s\"", r->regmust);
- printf("\n");
-}
-
-/*
- - regprop - printable representation of opcode
- */
-static char *
-regprop(op)
-char *op;
-{
- register char *p;
- static char buf[50];
-
- (void) strcpy(buf, ":");
-
- switch (OP(op)) {
- case BOL:
- p = "BOL";
- break;
- case EOL:
- p = "EOL";
- break;
- case ANY:
- p = "ANY";
- break;
- case ANYOF:
- p = "ANYOF";
- break;
- case ANYBUT:
- p = "ANYBUT";
- break;
- case BRANCH:
- p = "BRANCH";
- break;
- case EXACTLY:
- p = "EXACTLY";
- break;
- case NOTHING:
- p = "NOTHING";
- break;
- case BACK:
- p = "BACK";
- break;
- case END:
- p = "END";
- break;
- case OPEN+1:
- case OPEN+2:
- case OPEN+3:
- case OPEN+4:
- case OPEN+5:
- case OPEN+6:
- case OPEN+7:
- case OPEN+8:
- case OPEN+9:
- sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
- p = NULL;
- break;
- case CLOSE+1:
- case CLOSE+2:
- case CLOSE+3:
- case CLOSE+4:
- case CLOSE+5:
- case CLOSE+6:
- case CLOSE+7:
- case CLOSE+8:
- case CLOSE+9:
- sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
- p = NULL;
- break;
- case STAR:
- p = "STAR";
- break;
- case PLUS:
- p = "PLUS";
- break;
- default:
- sqd_regerror("corrupted opcode");
- break;
- }
- if (p != NULL)
- (void) strcat(buf, p);
- return(buf);
-}
-#endif
-
-
-/*
- - sqd_regsub - perform substitutions after a regexp match
- */
-void
-sqd_regsub(rp, source, dest)
-const sqd_regexp *rp;
-const char *source;
-char *dest;
-{
- register sqd_regexp * const prog = (sqd_regexp *)rp;
- register char *src = (char *)source;
- register char *dst = dest;
- register char c;
- register int no;
- register size_t len;
-
- if (prog == NULL || source == NULL || dest == NULL) {
- sqd_regerror("NULL parameter to sqd_regsub");
- return;
- }
- if ((unsigned char)*(prog->program) != SQD_REGMAGIC) {
- sqd_regerror("damaged regexp");
- return;
- }
-
- while ((c = *src++) != '\0') {
- if (c == '&')
- no = 0;
- else if (c == '\\' && isdigit((int) (*src)))
- no = *src++ - '0';
- else
- no = -1;
-
- if (no < 0) { /* Ordinary character. */
- if (c == '\\' && (*src == '\\' || *src == '&'))
- c = *src++;
- *dst++ = c;
- } else if (prog->startp[no] != NULL && prog->endp[no] != NULL &&
- prog->endp[no] > prog->startp[no]) {
- len = prog->endp[no] - prog->startp[no];
- (void) strncpy(dst, prog->startp[no], len);
- dst += len;
- if (*(dst-1) == '\0') { /* strncpy hit NUL. */
- sqd_regerror("damaged match string");
- return;
- }
- }
- }
- *dst++ = '\0';
-}
-
-
-void
-sqd_regerror(s)
-char *s;
-{
- fprintf(stderr, "regexp(3): %s\n", s);
- exit(EXIT_FAILURE);
- /* NOTREACHED */
-}
-
-#ifdef NBA_TEAM_IN_STL
-int
-main(int argc, char **argv)
-{
- char *pat;
- int ntok;
- char *s;
- int status;
-
- pat = argv[1];
- ntok = atoi(argv[2]);
- s = argv[3];
-
- status = Strparse(pat, s, ntok);
- if (status == 0) {
- printf("no match\n");
- } else {
- int i;
- printf("MATCH.\n");
- for (i = 1; i <= ntok; i++)
- printf("matched token %1d: %s\n", i, sqd_parse[i]);
- }
-}
-#endif /*NBA_TEAM_IN_STL*/
diff --git a/squid/install-sh b/squid/install-sh
deleted file mode 100644
index e9de238..0000000
--- a/squid/install-sh
+++ /dev/null
@@ -1,251 +0,0 @@
-#!/bin/sh
-#
-# install - install a program, script, or datafile
-# This comes from X11R5 (mit/util/scripts/install.sh).
-#
-# Copyright 1991 by the Massachusetts Institute of Technology
-#
-# Permission to use, copy, modify, distribute, and sell this software and its
-# documentation for any purpose is hereby granted without fee, provided that
-# the above copyright notice appear in all copies and that both that
-# copyright notice and this permission notice appear in supporting
-# documentation, and that the name of M.I.T. not be used in advertising or
-# publicity pertaining to distribution of the software without specific,
-# written prior permission. M.I.T. makes no representations about the
-# suitability of this software for any purpose. It is provided "as is"
-# without express or implied warranty.
-#
-# Calling this script install-sh is preferred over install.sh, to prevent
-# `make' implicit rules from creating a file called install from it
-# when there is no Makefile.
-#
-# This script is compatible with the BSD install script, but was written
-# from scratch. It can only install one file at a time, a restriction
-# shared with many OS's install programs.
-
-
-# set DOITPROG to echo to test this script
-
-# Don't use :- since 4.3BSD and earlier shells don't like it.
-doit="${DOITPROG-}"
-
-
-# put in absolute paths if you don't have them in your path; or use env. vars.
-
-mvprog="${MVPROG-mv}"
-cpprog="${CPPROG-cp}"
-chmodprog="${CHMODPROG-chmod}"
-chownprog="${CHOWNPROG-chown}"
-chgrpprog="${CHGRPPROG-chgrp}"
-stripprog="${STRIPPROG-strip}"
-rmprog="${RMPROG-rm}"
-mkdirprog="${MKDIRPROG-mkdir}"
-
-transformbasename=""
-transform_arg=""
-instcmd="$mvprog"
-chmodcmd="$chmodprog 0755"
-chowncmd=""
-chgrpcmd=""
-stripcmd=""
-rmcmd="$rmprog -f"
-mvcmd="$mvprog"
-src=""
-dst=""
-dir_arg=""
-
-while [ x"$1" != x ]; do
- case $1 in
- -c) instcmd="$cpprog"
- shift
- continue;;
-
- -d) dir_arg=true
- shift
- continue;;
-
- -m) chmodcmd="$chmodprog $2"
- shift
- shift
- continue;;
-
- -o) chowncmd="$chownprog $2"
- shift
- shift
- continue;;
-
- -g) chgrpcmd="$chgrpprog $2"
- shift
- shift
- continue;;
-
- -s) stripcmd="$stripprog"
- shift
- continue;;
-
- -t=*) transformarg=`echo $1 | sed 's/-t=//'`
- shift
- continue;;
-
- -b=*) transformbasename=`echo $1 | sed 's/-b=//'`
- shift
- continue;;
-
- *) if [ x"$src" = x ]
- then
- src=$1
- else
- # this colon is to work around a 386BSD /bin/sh bug
- :
- dst=$1
- fi
- shift
- continue;;
- esac
-done
-
-if [ x"$src" = x ]
-then
- echo "install: no input file specified"
- exit 1
-else
- true
-fi
-
-if [ x"$dir_arg" != x ]; then
- dst=$src
- src=""
-
- if [ -d $dst ]; then
- instcmd=:
- chmodcmd=""
- else
- instcmd=mkdir
- fi
-else
-
-# Waiting for this to be detected by the "$instcmd $src $dsttmp" command
-# might cause directories to be created, which would be especially bad
-# if $src (and thus $dsttmp) contains '*'.
-
- if [ -f $src -o -d $src ]
- then
- true
- else
- echo "install: $src does not exist"
- exit 1
- fi
-
- if [ x"$dst" = x ]
- then
- echo "install: no destination specified"
- exit 1
- else
- true
- fi
-
-# If destination is a directory, append the input filename; if your system
-# does not like double slashes in filenames, you may need to add some logic
-
- if [ -d $dst ]
- then
- dst="$dst"/`basename $src`
- else
- true
- fi
-fi
-
-## this sed command emulates the dirname command
-dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
-
-# Make sure that the destination directory exists.
-# this part is taken from Noah Friedman's mkinstalldirs script
-
-# Skip lots of stat calls in the usual case.
-if [ ! -d "$dstdir" ]; then
-defaultIFS='
-'
-IFS="${IFS-${defaultIFS}}"
-
-oIFS="${IFS}"
-# Some sh's can't handle IFS=/ for some reason.
-IFS='%'
-set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'`
-IFS="${oIFS}"
-
-pathcomp=''
-
-while [ $# -ne 0 ] ; do
- pathcomp="${pathcomp}${1}"
- shift
-
- if [ ! -d "${pathcomp}" ] ;
- then
- $mkdirprog "${pathcomp}"
- else
- true
- fi
-
- pathcomp="${pathcomp}/"
-done
-fi
-
-if [ x"$dir_arg" != x ]
-then
- $doit $instcmd $dst &&
-
- if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi &&
- if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi &&
- if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi &&
- if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi
-else
-
-# If we're going to rename the final executable, determine the name now.
-
- if [ x"$transformarg" = x ]
- then
- dstfile=`basename $dst`
- else
- dstfile=`basename $dst $transformbasename |
- sed $transformarg`$transformbasename
- fi
-
-# don't allow the sed command to completely eliminate the filename
-
- if [ x"$dstfile" = x ]
- then
- dstfile=`basename $dst`
- else
- true
- fi
-
-# Make a temp file name in the proper directory.
-
- dsttmp=$dstdir/#inst.$$#
-
-# Move or copy the file name to the temp name
-
- $doit $instcmd $src $dsttmp &&
-
- trap "rm -f ${dsttmp}" 0 &&
-
-# and set any options; do chmod last to preserve setuid bits
-
-# If any of these fail, we abort the whole thing. If we want to
-# ignore errors from any of these, just make sure not to ignore
-# errors from the above "$doit $instcmd $src $dsttmp" command.
-
- if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi &&
- if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi &&
- if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi &&
- if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi &&
-
-# Now rename the file to the real destination.
-
- $doit $rmcmd -f $dstdir/$dstfile &&
- $doit $mvcmd $dsttmp $dstdir/$dstfile
-
-fi &&
-
-
-exit 0
diff --git a/squid/iupac.c b/squid/iupac.c
deleted file mode 100644
index 01f1dd3..0000000
--- a/squid/iupac.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* iupac.c
- *
- * Globally defines the IUPAC symbols for nucleic acid sequence
- * Slowly evolving into a repository of globals. Tue Apr 20 1993
- *
- * CVS $Id: iupac.c,v 1.4 2003/04/14 16:00:16 eddy Exp $
- */
-#include "squidconf.h"
-#include "squid.h"
-
-/* Default expected nucleotide occurrence frequencies, A/C/G/T.
- * Used (for instance) as the default distribution for
- * i.i.d. random nucleotide sequences.
- */
-float dnafq[4] = { 0.25, 0.25, 0.25, 0.25 };
-
-/* Dayhoff f(i) amino acid occurrence frequencies.
- * From SwissProt 34: 21,210,388 residues
- * In alphabetic order by single-letter code.
- * Used (for instance) as the default distribution for
- * i.i.d. random protein sequences.
- */
-float aafq[20] = {
- 0.075520, /* A */
- 0.016973, /* C */
- 0.053029, /* D */
- 0.063204, /* E */
- 0.040762, /* F */
- 0.068448, /* G */
- 0.022406, /* H */
- 0.057284, /* I */
- 0.059398, /* K */
- 0.093399, /* L */
- 0.023569, /* M */
- 0.045293, /* N */
- 0.049262, /* P */
- 0.040231, /* Q */
- 0.051573, /* R */
- 0.072214, /* S */
- 0.057454, /* T */
- 0.065252, /* V */
- 0.012513, /* W */
- 0.031985 /* Y */
-};
-
-char aa_alphabet[] = AMINO_ALPHABET;
- /* aa_index converts to pam's 27x27 scheme */
-int aa_index[20] = { 0, 2, 3, 4, 5, 6, 7, 8, 10, 11,
- 12, 13, 15, 16, 17, 18, 19, 21, 22, 24 };
-
- /* IUPAC code translations */
- /* note: sequence chars are UPPER CASE */
-struct iupactype iupac[] = {
- { 'A', 'T', NTA, NTT, },
- { 'C', 'G', NTC, NTG, },
- { 'G', 'C', NTG, NTC, },
- { 'T', 'A', NTT, NTA, },
- { 'U', 'A', NTU, NTA, },
- { 'N', 'N', NTN, NTN, },
- { ' ', ' ', NTGAP, NTGAP, },
- { 'R', 'Y', NTR, NTY, },
- { 'Y', 'R', NTY, NTR, },
- { 'M', 'K', NTM, NTK, },
- { 'K', 'M', NTK, NTM, },
- { 'S', 'S', NTS, NTS, },
- { 'W', 'W', NTW, NTW, },
- { 'H', 'D', NTH, NTD, },
- { 'B', 'V', NTB, NTV, },
- { 'V', 'B', NTV, NTB, },
- { 'D', 'H', NTD, NTH, },
- };
-
-
-char *stdcode1[65] = {
- "K", /* AAA */
- "N", /* AAC */
- "K", /* AAG */
- "N", /* AAU */
- "T", /* ACA */
- "T", /* ACC */
- "T", /* ACG */
- "T", /* ACU */
- "R", /* AGA */
- "S", /* AGC */
- "R", /* AGG */
- "S", /* AGU */
- "I", /* AUA */
- "I", /* AUC */
- "M", /* AUG */
- "I", /* AUU */
- "Q", /* CAA */
- "H", /* CAC */
- "Q", /* CAG */
- "H", /* CAU */
- "P", /* CCA */
- "P", /* CCC */
- "P", /* CCG */
- "P", /* CCU */
- "R", /* CGA */
- "R", /* CGC */
- "R", /* CGG */
- "R", /* CGU */
- "L", /* CUA */
- "L", /* CUC */
- "L", /* CUG */
- "L", /* CUU */
- "E", /* GAA */
- "D", /* GAC */
- "E", /* GAG */
- "D", /* GAU */
- "A", /* GCA */
- "A", /* GCC */
- "A", /* GCG */
- "A", /* GCU */
- "G", /* GGA */
- "G", /* GGC */
- "G", /* GGG */
- "G", /* GGU */
- "V", /* GUA */
- "V", /* GUC */
- "V", /* GUG */
- "V", /* GUU */
- "*", /* UAA */
- "Y", /* UAC */
- "*", /* UAG */
- "Y", /* UAU */
- "S", /* UCA */
- "S", /* UCC */
- "S", /* UCG */
- "S", /* UCU */
- "*", /* UGA */
- "C", /* UGC */
- "W", /* UGG */
- "C", /* UGU */
- "L", /* UUA */
- "F", /* UUC */
- "L", /* UUG */
- "F", /* UUU */
- "X", /* unknown */
-};
-
-
-
-
-char *stdcode3[65] = {
- "Lys", /* AAA */
- "Asn", /* AAC */
- "Lys", /* AAG */
- "Asn", /* AAU */
- "Thr", /* ACA */
- "Thr", /* ACC */
- "Thr", /* ACG */
- "Thr", /* ACU */
- "Arg", /* AGA */
- "Ser", /* AGC */
- "Arg", /* AGG */
- "Ser", /* AGU */
- "Ile", /* AUA */
- "Ile", /* AUC */
- "Met", /* AUG */
- "Ile", /* AUU */
- "Gln", /* CAA */
- "His", /* CAC */
- "Gln", /* CAG */
- "His", /* CAU */
- "Pro", /* CCA */
- "Pro", /* CCC */
- "Pro", /* CCG */
- "Pro", /* CCU */
- "Arg", /* CGA */
- "Arg", /* CGC */
- "Arg", /* CGG */
- "Arg", /* CGU */
- "Leu", /* CUA */
- "Leu", /* CUC */
- "Leu", /* CUG */
- "Leu", /* CUU */
- "Glu", /* GAA */
- "Asp", /* GAC */
- "Glu", /* GAG */
- "Asp", /* GAU */
- "Ala", /* GCA */
- "Ala", /* GCC */
- "Ala", /* GCG */
- "Ala", /* GCU */
- "Gly", /* GGA */
- "Gly", /* GGC */
- "Gly", /* GGG */
- "Gly", /* GGU */
- "Val", /* GUA */
- "Val", /* GUC */
- "Val", /* GUG */
- "Val", /* GUU */
- "***", /* UAA */
- "Tyr", /* UAC */
- "***", /* UAG */
- "Tyr", /* UAU */
- "Ser", /* UCA */
- "Ser", /* UCC */
- "Ser", /* UCG */
- "Ser", /* UCU */
- "***", /* UGA */
- "Cys", /* UGC */
- "Trp", /* UGG */
- "Cys", /* UGU */
- "Leu", /* UUA */
- "Phe", /* UUC */
- "Leu", /* UUG */
- "Trp", /* UUU */
- "XXX", /* unknown */
-};
diff --git a/squid/msa.c b/squid/msa.c
deleted file mode 100644
index bdc8ce2..0000000
--- a/squid/msa.c
+++ /dev/null
@@ -1,1440 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* msa.c
- * SRE, Mon May 17 10:48:47 1999
- *
- * SQUID's interface for multiple sequence alignment
- * manipulation: access to the MSA object.
- *
- * CVS $Id: msa.c,v 1.20 2003/05/26 16:21:50 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "squid.h"
-#include "msa.h" /* multiple sequence alignment object support */
-#include "gki.h" /* string indexing hashtable code */
-#include "ssi.h" /* SSI sequence file indexing code */
-
-/* Function: MSAAlloc()
- * Date: SRE, Tue May 18 10:45:47 1999 [St. Louis]
- *
- * Purpose: Allocate an MSA structure, return a pointer
- * to it.
- *
- * Designed to be used in three ways:
- * 1) We know exactly the dimensions of the alignment:
- * both nseq and alen.
- * msa = MSAAlloc(nseq, alen);
- *
- * 2) We know the number of sequences but not alen.
- * (We add sequences later.)
- * msa = MSAAlloc(nseq, 0);
- *
- * 3) We even don't know the number of sequences, so
- * we'll have to dynamically expand allocations.
- * We provide a blocksize for the allocation expansion,
- * and expand when needed.
- * msa = MSAAlloc(10, 0);
- * if (msa->nseq == msa->nseqalloc) MSAExpand(msa);
- *
- * Args: nseq - number of sequences, or nseq allocation blocksize
- * alen - length of alignment in columns, or 0
- *
- * Returns: pointer to new MSA object, w/ all values initialized.
- * Note that msa->nseq is initialized to 0, though space
- * is allocated.
- *
- * Diagnostics: "always works". Die()'s on memory allocation failure.
- *
- */
-MSA *
-MSAAlloc(int nseq, int alen)
-{
- MSA *msa;
- int i;
-
- msa = MallocOrDie(sizeof(MSA));
- msa->aseq = MallocOrDie(sizeof(char *) * nseq);
- msa->sqname = MallocOrDie(sizeof(char *) * nseq);
- msa->sqlen = MallocOrDie(sizeof(int) * nseq);
- msa->wgt = MallocOrDie(sizeof(float) * nseq);
-
- for (i = 0; i < nseq; i++)
- {
- msa->sqname[i] = NULL;
- msa->sqlen[i] = 0;
- msa->wgt[i] = -1.0;
-
- if (alen != 0) msa->aseq[i] = MallocOrDie(sizeof(char) * (alen+1));
- else msa->aseq[i] = NULL;
- }
-
- msa->alen = alen;
- msa->nseq = 0;
- msa->nseqalloc = nseq;
- msa->nseqlump = nseq;
-
- msa->flags = 0;
- msa->type = kOtherSeq;
- msa->name = NULL;
- msa->desc = NULL;
- msa->acc = NULL;
- msa->au = NULL;
- msa->ss_cons = NULL;
- msa->sa_cons = NULL;
- msa->rf = NULL;
- msa->sqacc = NULL;
- msa->sqdesc = NULL;
- msa->ss = NULL;
- msa->sslen = NULL;
- msa->sa = NULL;
- msa->salen = NULL;
- msa->index = GKIInit();
- msa->lastidx = 0;
-
- for (i = 0; i < MSA_MAXCUTOFFS; i++) {
- msa->cutoff[i] = 0.;
- msa->cutoff_is_set[i] = FALSE;
- }
-
- /* Initialize unparsed optional markup
- */
- msa->comment = NULL;
- msa->ncomment = 0;
- msa->alloc_ncomment = 0;
-
- msa->gf_tag = NULL;
- msa->gf = NULL;
- msa->ngf = 0;
-
- msa->gs_tag = NULL;
- msa->gs = NULL;
- msa->gs_idx = NULL;
- msa->ngs = 0;
-
- msa->gc_tag = NULL;
- msa->gc = NULL;
- msa->gc_idx = NULL;
- msa->ngc = 0;
-
- msa->gr_tag = NULL;
- msa->gr = NULL;
- msa->gr_idx = NULL;
- msa->ngr = 0;
-
- /* Done. Return the alloced, initialized structure
- */
- return msa;
-}
-
-/* Function: MSAExpand()
- * Date: SRE, Tue May 18 11:06:53 1999 [St. Louis]
- *
- * Purpose: Increase the sequence allocation in an MSA
- * by msa->nseqlump. (Typically used when we're reading
- * in an alignment sequentially from a file,
- * so we don't know nseq until we're done.)
- *
- * Args: msa - the MSA object
- *
- * Returns: (void)
- *
- */
-void
-MSAExpand(MSA *msa)
-{
- int i,j;
-
- msa->nseqalloc += msa->nseqlump;
-
- msa->aseq = ReallocOrDie(msa->aseq, sizeof(char *) * msa->nseqalloc);
- msa->sqname = ReallocOrDie(msa->sqname, sizeof(char *) * msa->nseqalloc);
- msa->sqlen = ReallocOrDie(msa->sqlen, sizeof(char *) * msa->nseqalloc);
- msa->wgt = ReallocOrDie(msa->wgt, sizeof(float) * msa->nseqalloc);
-
- if (msa->ss != NULL) {
- msa->ss = ReallocOrDie(msa->ss, sizeof(char *) * msa->nseqalloc);
- msa->sslen = ReallocOrDie(msa->sslen, sizeof(int) * msa->nseqalloc);
- }
- if (msa->sa != NULL) {
- msa->sa = ReallocOrDie(msa->sa, sizeof(char *) * msa->nseqalloc);
- msa->salen = ReallocOrDie(msa->salen, sizeof(int) * msa->nseqalloc);
- }
- if (msa->sqacc != NULL)
- msa->sqacc = ReallocOrDie(msa->sqacc, sizeof(char *) * msa->nseqalloc);
- if (msa->sqdesc != NULL)
- msa->sqdesc =ReallocOrDie(msa->sqdesc,sizeof(char *) * msa->nseqalloc);
-
- for (i = msa->nseqalloc-msa->nseqlump; i < msa->nseqalloc; i++)
- {
- msa->sqname[i] = NULL;
- msa->wgt[i] = -1.0;
-
- if (msa->sqacc != NULL) msa->sqacc[i] = NULL;
- if (msa->sqdesc != NULL) msa->sqdesc[i] = NULL;
-
- if (msa->alen != 0)
- msa->aseq[i] = ReallocOrDie(msa->aseq[i], sizeof(char) * (msa->alen+1));
- else msa->aseq[i] = NULL;
- msa->sqlen[i] = 0;
-
- if (msa->ss != NULL) {
- if (msa->alen != 0)
- msa->ss[i] = ReallocOrDie(msa->ss[i], sizeof(char) * (msa->alen+1));
- else msa->ss[i] = NULL;
- msa->sslen[i] = 0;
- }
- if (msa->sa != NULL) {
- if (msa->alen != 0)
- msa->sa[i] = ReallocOrDie(msa->ss[i], sizeof(char) * (msa->alen+1));
- else
- msa->sa[i] = NULL;
- msa->salen[i] = 0;
- }
- }
-
- /* Reallocate and re-init for unparsed #=GS tags, if we have some.
- * gs is [0..ngs-1][0..nseq-1][], so we're reallocing the middle
- * set of pointers.
- */
- if (msa->gs != NULL)
- for (i = 0; i < msa->ngs; i++)
- {
- if (msa->gs[i] != NULL)
- {
- msa->gs[i] = ReallocOrDie(msa->gs[i], sizeof(char *) * msa->nseqalloc);
- for (j = msa->nseqalloc-msa->nseqlump; j < msa->nseqalloc; j++)
- msa->gs[i][j] = NULL;
- }
- }
-
- /* Reallocate and re-init for unparsed #=GR tags, if we have some.
- * gr is [0..ngs-1][0..nseq-1][], so we're reallocing the middle
- * set of pointers.
- */
- if (msa->gr != NULL)
- for (i = 0; i < msa->ngr; i++)
- {
- if (msa->gr[i] != NULL)
- {
- msa->gr[i] = ReallocOrDie(msa->gr[i], sizeof(char *) * msa->nseqalloc);
- for (j = msa->nseqalloc-msa->nseqlump; j < msa->nseqalloc; j++)
- msa->gr[i][j] = NULL;
- }
- }
-
- return;
-}
-
-/* Function: MSAFree()
- * Date: SRE, Tue May 18 11:20:16 1999 [St. Louis]
- *
- * Purpose: Free a multiple sequence alignment structure.
- *
- * Args: msa - the alignment
- *
- * Returns: (void)
- */
-void
-MSAFree(MSA *msa)
-{
- Free2DArray((void **) msa->aseq, msa->nseq);
- Free2DArray((void **) msa->sqname, msa->nseq);
- Free2DArray((void **) msa->sqacc, msa->nseq);
- Free2DArray((void **) msa->sqdesc, msa->nseq);
- Free2DArray((void **) msa->ss, msa->nseq);
- Free2DArray((void **) msa->sa, msa->nseq);
-
- if (msa->sqlen != NULL) free(msa->sqlen);
- if (msa->wgt != NULL) free(msa->wgt);
-
- if (msa->name != NULL) free(msa->name);
- if (msa->desc != NULL) free(msa->desc);
- if (msa->acc != NULL) free(msa->acc);
- if (msa->au != NULL) free(msa->au);
- if (msa->ss_cons != NULL) free(msa->ss_cons);
- if (msa->sa_cons != NULL) free(msa->sa_cons);
- if (msa->rf != NULL) free(msa->rf);
- if (msa->sslen != NULL) free(msa->sslen);
- if (msa->salen != NULL) free(msa->salen);
-
- Free2DArray((void **) msa->comment, msa->ncomment);
- Free2DArray((void **) msa->gf_tag, msa->ngf);
- Free2DArray((void **) msa->gf, msa->ngf);
- Free2DArray((void **) msa->gs_tag, msa->ngs);
- Free3DArray((void ***)msa->gs, msa->ngs, msa->nseq);
- Free2DArray((void **) msa->gc_tag, msa->ngc);
- Free2DArray((void **) msa->gc, msa->ngc);
- Free2DArray((void **) msa->gr_tag, msa->ngr);
- Free3DArray((void ***)msa->gr, msa->ngr, msa->nseq);
-
- GKIFree(msa->index);
- GKIFree(msa->gs_idx);
- GKIFree(msa->gc_idx);
- GKIFree(msa->gr_idx);
-
- free(msa);
-}
-
-
-/* Function: MSASetSeqAccession()
- * Date: SRE, Mon Jun 21 04:13:33 1999 [Sanger Centre]
- *
- * Purpose: Set a sequence accession in an MSA structure.
- * Handles some necessary allocation/initialization.
- *
- * Args: msa - multiple alignment to add accession to
- * seqidx - index of sequence to attach accession to
- * acc - accession
- *
- * Returns: void
- */
-void
-MSASetSeqAccession(MSA *msa, int seqidx, char *acc)
-{
- int x;
-
- if (msa->sqacc == NULL) {
- msa->sqacc = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- for (x = 0; x < msa->nseqalloc; x++)
- msa->sqacc[x] = NULL;
- }
- msa->sqacc[seqidx] = sre_strdup(acc, -1);
-}
-
-/* Function: MSASetSeqDescription()
- * Date: SRE, Mon Jun 21 04:21:09 1999 [Sanger Centre]
- *
- * Purpose: Set a sequence description in an MSA structure.
- * Handles some necessary allocation/initialization.
- *
- * Args: msa - multiple alignment to add accession to
- * seqidx - index of sequence to attach accession to
- * desc - description
- *
- * Returns: void
- */
-void
-MSASetSeqDescription(MSA *msa, int seqidx, char *desc)
-{
- int x;
-
- if (msa->sqdesc == NULL) {
- msa->sqdesc = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- for (x = 0; x < msa->nseqalloc; x++)
- msa->sqdesc[x] = NULL;
- }
- msa->sqdesc[seqidx] = sre_strdup(desc, -1);
-}
-
-
-/* Function: MSAAddComment()
- * Date: SRE, Tue Jun 1 17:37:21 1999 [St. Louis]
- *
- * Purpose: Add an (unparsed) comment line to the MSA structure,
- * allocating as necessary.
- *
- * Args: msa - a multiple alignment
- * s - comment line to add
- *
- * Returns: (void)
- */
-void
-MSAAddComment(MSA *msa, char *s)
-{
- /* If this is our first recorded comment, we need to malloc();
- * and if we've filled available space, we need to realloc().
- * Note the arbitrary lumpsize of 10 lines per allocation...
- */
- if (msa->comment == NULL) {
- msa->comment = MallocOrDie (sizeof(char *) * 10);
- msa->alloc_ncomment = 10;
- }
- if (msa->ncomment == msa->alloc_ncomment) {
- msa->alloc_ncomment += 10;
- msa->comment = ReallocOrDie(msa->comment, sizeof(char *) * msa->alloc_ncomment);
- }
-
- msa->comment[msa->ncomment] = sre_strdup(s, -1);
- msa->ncomment++;
- return;
-}
-
-/* Function: MSAAddGF()
- * Date: SRE, Wed Jun 2 06:53:54 1999 [bus to Madison]
- *
- * Purpose: Add an unparsed #=GF markup line to the MSA
- * structure, allocating as necessary.
- *
- * Args: msa - a multiple alignment
- * tag - markup tag (e.g. "AU")
- * value - free text markup (e.g. "Alex Bateman")
- *
- * Returns: (void)
- */
-void
-MSAAddGF(MSA *msa, char *tag, char *value)
-{
- /* If this is our first recorded unparsed #=GF line, we need to malloc();
- * if we've filled availabl space If we already have a hash index, and the GF
- * Note the arbitrary lumpsize of 10 lines per allocation...
- */
- if (msa->gf_tag == NULL) {
- msa->gf_tag = MallocOrDie (sizeof(char *) * 10);
- msa->gf = MallocOrDie (sizeof(char *) * 10);
- msa->alloc_ngf = 10;
- }
- if (msa->ngf == msa->alloc_ngf) {
- msa->alloc_ngf += 10;
- msa->gf_tag = ReallocOrDie(msa->gf_tag, sizeof(char *) * msa->alloc_ngf);
- msa->gf = ReallocOrDie(msa->gf, sizeof(char *) * msa->alloc_ngf);
- }
-
- msa->gf_tag[msa->ngf] = sre_strdup(tag, -1);
- msa->gf[msa->ngf] = sre_strdup(value, -1);
- msa->ngf++;
-
- return;
-}
-
-
-/* Function: MSAAddGS()
- * Date: SRE, Wed Jun 2 06:57:03 1999 [St. Louis]
- *
- * Purpose: Add an unparsed #=GS markup line to the MSA
- * structure, allocating as necessary.
- *
- * It's possible that we could get more than one
- * of the same type of GS tag per sequence; for
- * example, "DR PDB;" structure links in Pfam.
- * Hack: handle these by appending to the string,
- * in a \n separated fashion.
- *
- * Args: msa - multiple alignment structure
- * tag - markup tag (e.g. "AC")
- * sqidx - index of sequence to assoc markup with (0..nseq-1)
- * value - markup (e.g. "P00666")
- *
- * Returns: 0 on success
- */
-void
-MSAAddGS(MSA *msa, char *tag, int sqidx, char *value)
-{
- int tagidx;
- int i;
-
- /* Is this an unparsed tag name that we recognize?
- * If not, handle adding it to index, and reallocating
- * as needed.
- */
- if (msa->gs_tag == NULL) /* first tag? init w/ malloc */
- {
- msa->gs_idx = GKIInit();
- tagidx = GKIStoreKey(msa->gs_idx, tag);
- SQD_DASSERT1((tagidx == 0));
- msa->gs_tag = MallocOrDie(sizeof(char *));
- msa->gs = MallocOrDie(sizeof(char **));
- msa->gs[0] = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- for (i = 0; i < msa->nseqalloc; i++)
- msa->gs[0][i] = NULL;
- }
- else
- {
- /* new tag? */
- tagidx = GKIKeyIndex(msa->gs_idx, tag);
- if (tagidx < 0) { /* it's a new tag name; realloc */
- tagidx = GKIStoreKey(msa->gs_idx, tag);
- /* since we alloc in blocks of 1,
- we always realloc upon seeing
- a new tag. */
- SQD_DASSERT1((tagidx == msa->ngs));
- msa->gs_tag = ReallocOrDie(msa->gs_tag, (msa->ngs+1) * sizeof(char *));
- msa->gs = ReallocOrDie(msa->gs, (msa->ngs+1) * sizeof(char **));
- msa->gs[msa->ngs] = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- for (i = 0; i < msa->nseqalloc; i++)
- msa->gs[msa->ngs][i] = NULL;
- }
- }
-
- if (tagidx == msa->ngs) {
- msa->gs_tag[tagidx] = sre_strdup(tag, -1);
- msa->ngs++;
- }
-
- if (msa->gs[tagidx][sqidx] == NULL) /* first annotation of this seq with this tag? */
- msa->gs[tagidx][sqidx] = sre_strdup(value, -1);
- else {
- /* >1 annotation of this seq with this tag; append */
- int len;
- if ((len = sre_strcat(&(msa->gs[tagidx][sqidx]), -1, "\n", 1)) < 0)
- Die("failed to sre_strcat()");
- if (sre_strcat(&(msa->gs[tagidx][sqidx]), len, value, -1) < 0)
- Die("failed to sre_strcat()");
- }
- return;
-}
-
-/* Function: MSAAppendGC()
- * Date: SRE, Thu Jun 3 06:25:14 1999 [Madison]
- *
- * Purpose: Add an unparsed #=GC markup line to the MSA
- * structure, allocating as necessary.
- *
- * When called multiple times for the same tag,
- * appends value strings together -- used when
- * parsing multiblock alignment files, for
- * example.
- *
- * Args: msa - multiple alignment structure
- * tag - markup tag (e.g. "CS")
- * value - markup, one char per aligned column
- *
- * Returns: (void)
- */
-void
-MSAAppendGC(MSA *msa, char *tag, char *value)
-{
- int tagidx;
-
- /* Is this an unparsed tag name that we recognize?
- * If not, handle adding it to index, and reallocating
- * as needed.
- */
- if (msa->gc_tag == NULL) /* first tag? init w/ malloc */
- {
- msa->gc_tag = MallocOrDie(sizeof(char *));
- msa->gc = MallocOrDie(sizeof(char *));
- msa->gc_idx = GKIInit();
- tagidx = GKIStoreKey(msa->gc_idx, tag);
- SQD_DASSERT1((tagidx == 0));
- msa->gc[0] = NULL;
- }
- else
- { /* new tag? */
- tagidx = GKIKeyIndex(msa->gc_idx, tag);
- if (tagidx < 0) { /* it's a new tag name; realloc */
- tagidx = GKIStoreKey(msa->gc_idx, tag);
- /* since we alloc in blocks of 1,
- we always realloc upon seeing
- a new tag. */
- SQD_DASSERT1((tagidx == msa->ngc));
- msa->gc_tag = ReallocOrDie(msa->gc_tag, (msa->ngc+1) * sizeof(char **));
- msa->gc = ReallocOrDie(msa->gc, (msa->ngc+1) * sizeof(char **));
- msa->gc[tagidx] = NULL;
- }
- }
-
- if (tagidx == msa->ngc) {
- msa->gc_tag[tagidx] = sre_strdup(tag, -1);
- msa->ngc++;
- }
- sre_strcat(&(msa->gc[tagidx]), -1, value, -1);
- return;
-}
-
-/* Function: MSAGetGC()
- * Date: SRE, Fri Aug 13 13:25:57 1999 [St. Louis]
- *
- * Purpose: Given a tagname for a miscellaneous #=GC column
- * annotation, return a pointer to the annotation
- * string.
- *
- * Args: msa - alignment and its annotation
- * tag - name of the annotation
- *
- * Returns: ptr to the annotation string. Caller does *not*
- * free; is managed by msa object still.
- */
-char *
-MSAGetGC(MSA *msa, char *tag)
-{
- int tagidx;
-
- if (msa->gc_idx == NULL) return NULL;
- if ((tagidx = GKIKeyIndex(msa->gc_idx, tag)) < 0) return NULL;
- return msa->gc[tagidx];
-}
-
-
-/* Function: MSAAppendGR()
- * Date: SRE, Thu Jun 3 06:34:38 1999 [Madison]
- *
- * Purpose: Add an unparsed #=GR markup line to the
- * MSA structure, allocating as necessary.
- *
- * When called multiple times for the same tag,
- * appends value strings together -- used when
- * parsing multiblock alignment files, for
- * example.
- *
- * Args: msa - multiple alignment structure
- * tag - markup tag (e.g. "SS")
- * sqidx - index of seq to assoc markup with (0..nseq-1)
- * value - markup, one char per aligned column
- *
- * Returns: (void)
- */
-void
-MSAAppendGR(MSA *msa, char *tag, int sqidx, char *value)
-{
- int tagidx;
- int i;
-
- /* Is this an unparsed tag name that we recognize?
- * If not, handle adding it to index, and reallocating
- * as needed.
- */
- if (msa->gr_tag == NULL) /* first tag? init w/ malloc */
- {
- msa->gr_tag = MallocOrDie(sizeof(char *));
- msa->gr = MallocOrDie(sizeof(char **));
- msa->gr[0] = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- for (i = 0; i < msa->nseqalloc; i++)
- msa->gr[0][i] = NULL;
- msa->gr_idx = GKIInit();
- tagidx = GKIStoreKey(msa->gr_idx, tag);
- SQD_DASSERT1((tagidx == 0));
- }
- else
- {
- /* new tag? */
- tagidx = GKIKeyIndex(msa->gr_idx, tag);
- if (tagidx < 0) { /* it's a new tag name; realloc */
- tagidx = GKIStoreKey(msa->gr_idx, tag);
- /* since we alloc in blocks of 1,
- we always realloc upon seeing
- a new tag. */
- SQD_DASSERT1((tagidx == msa->ngr));
- msa->gr_tag = ReallocOrDie(msa->gr_tag, (msa->ngr+1) * sizeof(char *));
- msa->gr = ReallocOrDie(msa->gr, (msa->ngr+1) * sizeof(char **));
- msa->gr[msa->ngr] = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- for (i = 0; i < msa->nseqalloc; i++)
- msa->gr[msa->ngr][i] = NULL;
- }
- }
-
- if (tagidx == msa->ngr) {
- msa->gr_tag[tagidx] = sre_strdup(tag, -1);
- msa->ngr++;
- }
- sre_strcat(&(msa->gr[tagidx][sqidx]), -1, value, -1);
- return;
-}
-
-
-/* Function: MSAVerifyParse()
- * Date: SRE, Sat Jun 5 14:24:24 1999 [Madison, 1999 worm mtg]
- *
- * Purpose: Last function called after a multiple alignment is
- * parsed. Checks that parse was successful; makes sure
- * required information is present; makes sure required
- * information is consistent. Some fields that are
- * only use during parsing may be freed (sqlen, for
- * example).
- *
- * Some fields in msa may be modified (msa->alen is set,
- * for example).
- *
- * Args: msa - the multiple alignment
- * sqname, aseq must be set
- * nseq must be correct
- * alen need not be set; will be set here.
- * wgt will be set here if not already set
- *
- * Returns: (void)
- * Will Die() here with diagnostics on error.
- *
- * Example:
- */
-void
-MSAVerifyParse(MSA *msa)
-{
- int idx;
-
- if (msa->nseq == 0) Die("Parse error: no sequences were found for alignment %s",
- msa->name != NULL ? msa->name : "");
-
- msa->alen = msa->sqlen[0];
-
- /* We can rely on msa->sqname[] being valid for any index,
- * because of the way the line parsers always store any name
- * they add to the index.
- */
- for (idx = 0; idx < msa->nseq; idx++)
- {
- /* aseq is required. */
- if (msa->aseq[idx] == NULL)
- Die("Parse error: No sequence for %s in alignment %s", msa->sqname[idx],
- msa->name != NULL ? msa->name : "");
- /* either all weights must be set, or none of them */
- if ((msa->flags & MSA_SET_WGT) && msa->wgt[idx] == -1.0)
- Die("Parse error: some weights are set, but %s doesn't have one in alignment %s",
- msa->sqname[idx],
- msa->name != NULL ? msa->name : "");
- /* all aseq must be same length. */
- if (msa->sqlen[idx] != msa->alen)
- Die("Parse error: sequence %s: length %d, expected %d in alignment %s",
- msa->sqname[idx], msa->sqlen[idx], msa->alen,
- msa->name != NULL ? msa->name : "");
- /* if SS is present, must have length right */
- if (msa->ss != NULL && msa->ss[idx] != NULL && msa->sslen[idx] != msa->alen)
- Die("Parse error: #=GR SS annotation for %s: length %d, expected %d in alignment %s",
- msa->sqname[idx], msa->sslen[idx], msa->alen,
- msa->name != NULL ? msa->name : "");
- /* if SA is present, must have length right */
- if (msa->sa != NULL && msa->sa[idx] != NULL && msa->salen[idx] != msa->alen)
- Die("Parse error: #=GR SA annotation for %s: length %d, expected %d in alignment %s",
- msa->sqname[idx], msa->salen[idx], msa->alen,
- msa->name != NULL ? msa->name : "");
- }
-
- /* if cons SS is present, must have length right */
- if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen)
- Die("Parse error: #=GC SS_cons annotation: length %d, expected %d in alignment %s",
- strlen(msa->ss_cons), msa->alen,
- msa->name != NULL ? msa->name : "");
-
- /* if cons SA is present, must have length right */
- if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen)
- Die("Parse error: #=GC SA_cons annotation: length %d, expected %d in alignment %s",
- strlen(msa->sa_cons), msa->alen,
- msa->name != NULL ? msa->name : "");
-
- /* if RF is present, must have length right */
- if (msa->rf != NULL && strlen(msa->rf) != msa->alen)
- Die("Parse error: #=GC RF annotation: length %d, expected %d in alignment %s",
- strlen(msa->rf), msa->alen,
- msa->name != NULL ? msa->name : "");
-
- /* Check that all or no weights are set */
- if (!(msa->flags & MSA_SET_WGT))
- FSet(msa->wgt, msa->nseq, 1.0); /* default weights */
-
- /* Clean up a little from the parser */
- if (msa->sqlen != NULL) { free(msa->sqlen); msa->sqlen = NULL; }
- if (msa->sslen != NULL) { free(msa->sslen); msa->sslen = NULL; }
- if (msa->salen != NULL) { free(msa->salen); msa->salen = NULL; }
-
- return;
-}
-
-
-
-
-/* Function: MSAFileOpen()
- * Date: SRE, Tue May 18 13:22:01 1999 [St. Louis]
- *
- * Purpose: Open an alignment database file and prepare
- * for reading one alignment, or sequentially
- * in the (rare) case of multiple MSA databases
- * (e.g. Stockholm format).
- *
- * Args: filename - name of file to open
- * if "-", read stdin
- * if it ends in ".gz", read from pipe to gunzip -dc
- * format - format of file (e.g. MSAFILE_STOCKHOLM)
- * env - environment variable for path (e.g. BLASTDB)
- *
- * Returns: opened MSAFILE * on success.
- * NULL on failure:
- * usually, because the file doesn't exist;
- * for gzip'ed files, may also mean that gzip isn't in the path.
- */
-MSAFILE *
-MSAFileOpen(char *filename, int format, char *env)
-{
- MSAFILE *afp;
-
- afp = MallocOrDie(sizeof(MSAFILE));
- if (strcmp(filename, "-") == 0)
- {
- afp->f = stdin;
- afp->do_stdin = TRUE;
- afp->do_gzip = FALSE;
- afp->fname = sre_strdup("[STDIN]", -1);
- afp->ssi = NULL; /* can't index stdin because we can't seek*/
- }
-#ifndef SRE_STRICT_ANSI
- /* popen(), pclose() aren't portable to non-POSIX systems; disable */
- else if (Strparse("^.*\\.gz$", filename, 0))
- {
- char cmd[256];
-
- /* Note that popen() will return "successfully"
- * if file doesn't exist, because gzip works fine
- * and prints an error! So we have to check for
- * existence of file ourself.
- */
- if (! FileExists(filename))
- Die("%s: file does not exist", filename);
- if (strlen(filename) + strlen("gzip -dc ") >= 256)
- Die("filename > 255 char in MSAFileOpen()");
- sprintf(cmd, "gzip -dc %s", filename);
- if ((afp->f = popen(cmd, "r")) == NULL)
- return NULL;
-
- afp->do_stdin = FALSE;
- afp->do_gzip = TRUE;
- afp->fname = sre_strdup(filename, -1);
- /* we can't index a .gz file, because we can't seek in a pipe afaik */
- afp->ssi = NULL;
- }
-#endif /*SRE_STRICT_ANSI*/
- else
- {
- char *ssifile;
- char *dir;
-
- /* When we open a file, it may be either in the current
- * directory, or in the directory indicated by the env
- * argument - and we have to construct the SSI filename accordingly.
- */
- if ((afp->f = fopen(filename, "r")) != NULL)
- {
- ssifile = MallocOrDie(sizeof(char) * (strlen(filename) + 5));
- sprintf(ssifile, "%s.ssi", filename);
- }
- else if ((afp->f = EnvFileOpen(filename, env, &dir)) != NULL)
- {
- char *full;
- full = FileConcat(dir, filename);
- ssifile = MallocOrDie(sizeof(char) * (strlen(full) + strlen(filename) + 5));
- sprintf(ssifile, "%s.ssi", full);
- free(dir);
- }
- else return NULL;
-
- afp->do_stdin = FALSE;
- afp->do_gzip = FALSE;
- afp->fname = sre_strdup(filename, -1);
- afp->ssi = NULL;
-
- /* Open the SSI index file. If it doesn't exist, or
- * it's corrupt, or some error happens, afp->ssi stays NULL.
- */
- SSIOpen(ssifile, &(afp->ssi));
- free(ssifile);
- }
-
- /* Invoke autodetection if we haven't already been told what
- * to expect.
- */
- if (format == MSAFILE_UNKNOWN)
- {
- if (afp->do_stdin == TRUE || afp->do_gzip)
- Die("Can't autodetect alignment file format from a stdin or gzip pipe");
- format = MSAFileFormat(afp);
- if (format == MSAFILE_UNKNOWN)
- Die("Can't determine format of multiple alignment file %s", afp->fname);
- }
-
- afp->format = format;
- afp->linenumber = 0;
- afp->buf = NULL;
- afp->buflen = 0;
-
- return afp;
-}
-
-
-/* Function: MSAFilePositionByKey()
- * MSAFilePositionByIndex()
- * MSAFileRewind()
- *
- * Date: SRE, Tue Nov 9 19:02:54 1999 [St. Louis]
- *
- * Purpose: Family of functions for repositioning in
- * open MSA files; analogous to a similarly
- * named function series in HMMER's hmmio.c.
- *
- * Args: afp - open alignment file
- * offset - disk offset in bytes
- * key - key to look up in SSI indices
- * idx - index of alignment.
- *
- * Returns: 0 on failure.
- * 1 on success.
- * If called on a non-fseek()'able file (e.g. a gzip'ed
- * or pipe'd alignment), returns 0 as a failure flag.
- */
-int
-MSAFileRewind(MSAFILE *afp)
-{
- if (afp->do_gzip || afp->do_stdin) return 0;
- rewind(afp->f);
- return 1;
-}
-int
-MSAFilePositionByKey(MSAFILE *afp, char *key)
-{
- int fh; /* filehandle is ignored */
- SSIOFFSET offset; /* offset of the key alignment */
-
- if (afp->ssi == NULL) return 0;
- if (SSIGetOffsetByName(afp->ssi, key, &fh, &offset) != 0) return 0;
- if (SSISetFilePosition(afp->f, &offset) != 0) return 0;
- return 1;
-}
-int
-MSAFilePositionByIndex(MSAFILE *afp, int idx)
-{
- int fh; /* filehandled is passed but ignored */
- SSIOFFSET offset; /* disk offset of desired alignment */
-
- if (afp->ssi == NULL) return 0;
- if (SSIGetOffsetByNumber(afp->ssi, idx, &fh, &offset) != 0) return 0;
- if (SSISetFilePosition(afp->f, &offset) != 0) return 0;
- return 1;
-}
-
-
-/* Function: MSAFileRead()
- * Date: SRE, Fri May 28 16:01:43 1999 [St. Louis]
- *
- * Purpose: Read the next msa from an open alignment file.
- * This is a wrapper around format-specific calls.
- *
- * Args: afp - open alignment file
- *
- * Returns: next alignment, or NULL if out of alignments
- */
-MSA *
-MSAFileRead(MSAFILE *afp)
-{
- MSA *msa = NULL;
-
- switch (afp->format) {
- case MSAFILE_STOCKHOLM: msa = ReadStockholm(afp); break;
- case MSAFILE_MSF: msa = ReadMSF(afp); break;
- case MSAFILE_A2M: msa = ReadA2M(afp); break;
- case MSAFILE_CLUSTAL: msa = ReadClustal(afp); break;
- case MSAFILE_SELEX: msa = ReadSELEX(afp); break;
- case MSAFILE_PHYLIP: msa = ReadPhylip(afp); break;
- default:
- Die("MSAFILE corrupted: bad format index");
- }
- return msa;
-}
-
-/* Function: MSAFileClose()
- * Date: SRE, Tue May 18 14:05:28 1999 [St. Louis]
- *
- * Purpose: Close an open MSAFILE.
- *
- * Args: afp - ptr to an open MSAFILE.
- *
- * Returns: void
- */
-void
-MSAFileClose(MSAFILE *afp)
-{
-#ifndef SRE_STRICT_ANSI /* gzip functionality only on POSIX systems */
- if (afp->do_gzip) pclose(afp->f);
-#endif
- if (! afp->do_stdin) fclose(afp->f);
- if (afp->buf != NULL) free(afp->buf);
- if (afp->ssi != NULL) SSIClose(afp->ssi);
- if (afp->fname != NULL) free(afp->fname);
- free(afp);
-}
-
-char *
-MSAFileGetLine(MSAFILE *afp)
-{
- char *s;
- if ((s = sre_fgets(&(afp->buf), &(afp->buflen), afp->f)) == NULL)
- return NULL;
- afp->linenumber++;
- return afp->buf;
-}
-
-void
-MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline)
-{
- switch (outfmt) {
- case MSAFILE_A2M: WriteA2M(fp, msa); break;
- case MSAFILE_CLUSTAL: WriteClustal(fp, msa); break;
- case MSAFILE_MSF: WriteMSF(fp, msa); break;
- case MSAFILE_PHYLIP: WritePhylip(fp, msa); break;
- case MSAFILE_SELEX: WriteSELEX(fp, msa); break;
- case MSAFILE_STOCKHOLM:
- if (do_oneline) WriteStockholmOneBlock(fp, msa);
- else WriteStockholm(fp, msa);
- break;
- default:
- Die("can't write. no such alignment format %d\n", outfmt);
- }
-}
-
-/* Function: MSAGetSeqidx()
- * Date: SRE, Wed May 19 15:08:25 1999 [St. Louis]
- *
- * Purpose: From a sequence name, return seqidx appropriate
- * for an MSA structure.
- *
- * 1) try to guess the index. (pass -1 if you can't guess)
- * 2) Look up name in msa's hashtable.
- * 3) If it's a new name, store in msa's hashtable;
- * expand allocs as needed;
- * save sqname.
- *
- * Args: msa - alignment object
- * name - a sequence name
- * guess - a guess at the right index, or -1 if no guess.
- *
- * Returns: seqidx
- */
-int
-MSAGetSeqidx(MSA *msa, char *name, int guess)
-{
- int seqidx;
- /* can we guess? */
- if (guess >= 0 && guess < msa->nseq && strcmp(name, msa->sqname[guess]) == 0)
- return guess;
- /* else, a lookup in the index */
- if ((seqidx = GKIKeyIndex(msa->index, name)) >= 0)
- return seqidx;
- /* else, it's a new name */
- seqidx = GKIStoreKey(msa->index, name);
- if (seqidx >= msa->nseqalloc) MSAExpand(msa);
-
- msa->sqname[seqidx] = sre_strdup(name, -1);
- msa->nseq++;
- return seqidx;
-}
-
-
-/* Function: MSAFromAINFO()
- * Date: SRE, Mon Jun 14 11:22:24 1999 [St. Louis]
- *
- * Purpose: Convert the old aseq/ainfo alignment structure
- * to new MSA structure. Enables more rapid conversion
- * of codebase to the new world order.
- *
- * Args: aseq - [0..nseq-1][0..alen-1] alignment
- * ainfo - old-style optional info
- *
- * Returns: MSA *
- */
-MSA *
-MSAFromAINFO(char **aseq, AINFO *ainfo)
-{
- MSA *msa;
- int i, j;
-
- msa = MSAAlloc(ainfo->nseq, ainfo->alen);
- for (i = 0; i < ainfo->nseq; i++)
- {
- strcpy(msa->aseq[i], aseq[i]);
- msa->wgt[i] = ainfo->wgt[i];
- msa->sqname[i] = sre_strdup(ainfo->sqinfo[i].name, -1);
- msa->sqlen[i] = msa->alen;
- GKIStoreKey(msa->index, msa->sqname[i]);
-
- if (ainfo->sqinfo[i].flags & SQINFO_ACC)
- MSASetSeqAccession(msa, i, ainfo->sqinfo[i].acc);
-
- if (ainfo->sqinfo[i].flags & SQINFO_DESC)
- MSASetSeqDescription(msa, i, ainfo->sqinfo[i].desc);
-
- if (ainfo->sqinfo[i].flags & SQINFO_SS) {
- if (msa->ss == NULL) {
- msa->ss = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- msa->sslen = MallocOrDie(sizeof(int) * msa->nseqalloc);
- for (j = 0; j < msa->nseqalloc; j++) {
- msa->ss[j] = NULL;
- msa->sslen[j] = 0;
- }
- }
- MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].ss, &(msa->ss[i]));
- msa->sslen[i] = msa->alen;
- }
-
- if (ainfo->sqinfo[i].flags & SQINFO_SA) {
- if (msa->sa == NULL) {
- msa->sa = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- msa->salen = MallocOrDie(sizeof(int) * msa->nseqalloc);
- for (j = 0; j < msa->nseqalloc; j++) {
- msa->sa[j] = NULL;
- msa->salen[j] = 0;
- }
- }
- MakeAlignedString(msa->aseq[i], msa->alen, ainfo->sqinfo[i].sa, &(msa->sa[i]));
- msa->salen[i] = msa->alen;
- }
- }
- /* note that sre_strdup() returns NULL when passed NULL */
- msa->name = sre_strdup(ainfo->name, -1);
- msa->desc = sre_strdup(ainfo->desc, -1);
- msa->acc = sre_strdup(ainfo->acc, -1);
- msa->au = sre_strdup(ainfo->au, -1);
- msa->ss_cons = sre_strdup(ainfo->cs, -1);
- msa->rf = sre_strdup(ainfo->rf, -1);
- if (ainfo->flags & AINFO_TC) {
- msa->cutoff[MSA_CUTOFF_TC1] = ainfo->tc1; msa->cutoff_is_set[MSA_CUTOFF_TC1] = TRUE;
- msa->cutoff[MSA_CUTOFF_TC2] = ainfo->tc2; msa->cutoff_is_set[MSA_CUTOFF_TC2] = TRUE;
- }
- if (ainfo->flags & AINFO_NC) {
- msa->cutoff[MSA_CUTOFF_NC1] = ainfo->nc1; msa->cutoff_is_set[MSA_CUTOFF_NC1] = TRUE;
- msa->cutoff[MSA_CUTOFF_NC2] = ainfo->nc2; msa->cutoff_is_set[MSA_CUTOFF_NC2] = TRUE;
- }
- if (ainfo->flags & AINFO_GA) {
- msa->cutoff[MSA_CUTOFF_GA1] = ainfo->ga1; msa->cutoff_is_set[MSA_CUTOFF_GA1] = TRUE;
- msa->cutoff[MSA_CUTOFF_GA2] = ainfo->ga2; msa->cutoff_is_set[MSA_CUTOFF_GA2] = TRUE;
- }
- msa->nseq = ainfo->nseq;
- msa->alen = ainfo->alen;
- return msa;
-}
-
-
-
-
-/* Function: MSAFileFormat()
- * Date: SRE, Fri Jun 18 14:26:49 1999 [Sanger Centre]
- *
- * Purpose: (Attempt to) determine the format of an alignment file.
- * Since it rewinds the file pointer when it's done,
- * cannot be used on a pipe or gzip'ed file. Works by
- * calling SeqfileFormat() from sqio.c, then making sure
- * that the format is indeed an alignment. If the format
- * comes back as FASTA, it assumes that the format as A2M
- * (e.g. aligned FASTA).
- *
- * Args: fname - file to evaluate
- *
- * Returns: format code; e.g. MSAFILE_STOCKHOLM
- */
-int
-MSAFileFormat(MSAFILE *afp)
-{
- int fmt;
-
- fmt = SeqfileFormat(afp->f);
-
- if (fmt == SQFILE_FASTA) fmt = MSAFILE_A2M;
-
- if (fmt != MSAFILE_UNKNOWN && ! IsAlignmentFormat(fmt))
- Die("File %s does not appear to be an alignment file;\n\
-rather, it appears to be an unaligned file in %s format.\n\
-I'm expecting an alignment file in this context.\n",
- afp->fname,
- SeqfileFormat2String(fmt));
- return fmt;
-}
-
-
-/* Function: MSAMingap()
- * Date: SRE, Mon Jun 28 18:57:54 1999 [on jury duty, St. Louis Civil Court]
- *
- * Purpose: Remove all-gap columns from a multiple sequence alignment
- * and its associated per-residue data.
- *
- * Args: msa - the alignment
- *
- * Returns: (void)
- */
-void
-MSAMingap(MSA *msa)
-{
- int *useme; /* array of TRUE/FALSE flags for which columns to keep */
- int apos; /* position in original alignment */
- int idx; /* sequence index */
-
- useme = MallocOrDie(sizeof(int) * msa->alen);
- for (apos = 0; apos < msa->alen; apos++)
- {
- for (idx = 0; idx < msa->nseq; idx++)
- if (! isgap(msa->aseq[idx][apos]))
- break;
- if (idx == msa->nseq) useme[apos] = FALSE; else useme[apos] = TRUE;
- }
- MSAShorterAlignment(msa, useme);
- free(useme);
- return;
-}
-
-/* Function: MSANogap()
- * Date: SRE, Wed Nov 17 09:59:51 1999 [St. Louis]
- *
- * Purpose: Remove all columns from a multiple sequence alignment that
- * contain any gaps -- used for filtering before phylogenetic
- * analysis.
- *
- * Args: msa - the alignment
- *
- * Returns: (void). The alignment is modified, so if you want to keep
- * the original for something, make a copy.
- */
-void
-MSANogap(MSA *msa)
-{
- int *useme; /* array of TRUE/FALSE flags for which columns to keep */
- int apos; /* position in original alignment */
- int idx; /* sequence index */
-
- useme = MallocOrDie(sizeof(int) * msa->alen);
- for (apos = 0; apos < msa->alen; apos++)
- {
- for (idx = 0; idx < msa->nseq; idx++)
- if (isgap(msa->aseq[idx][apos]))
- break;
- if (idx == msa->nseq) useme[apos] = TRUE; else useme[apos] = FALSE;
- }
- MSAShorterAlignment(msa, useme);
- free(useme);
- return;
-}
-
-
-/* Function: MSAShorterAlignment()
- * Date: SRE, Wed Nov 17 09:49:32 1999 [St. Louis]
- *
- * Purpose: Given an array "useme" (0..alen-1) of TRUE/FALSE flags,
- * where TRUE means "keep this column in the new alignment":
- * Remove all columns annotated as "FALSE" in the useme
- * array.
- *
- * Args: msa - the alignment. The alignment is changed, so
- * if you don't want the original screwed up, make
- * a copy of it first.
- * useme - TRUE/FALSE flags for columns to keep: 0..alen-1
- *
- * Returns: (void)
- */
-void
-MSAShorterAlignment(MSA *msa, int *useme)
-{
- int apos; /* position in original alignment */
- int mpos; /* position in new alignment */
- int idx; /* sequence index */
- int i; /* markup index */
-
- /* Since we're minimizing, we can overwrite, using already allocated
- * memory.
- */
- for (apos = 0, mpos = 0; apos < msa->alen; apos++)
- {
- if (useme[apos] == FALSE) continue;
-
- /* shift alignment and associated per-column+per-residue markup */
- if (mpos != apos)
- {
- for (idx = 0; idx < msa->nseq; idx++)
- {
- msa->aseq[idx][mpos] = msa->aseq[idx][apos];
- if (msa->ss != NULL && msa->ss[idx] != NULL) msa->ss[idx][mpos] = msa->ss[idx][apos];
- if (msa->sa != NULL && msa->sa[idx] != NULL) msa->sa[idx][mpos] = msa->sa[idx][apos];
-
- for (i = 0; i < msa->ngr; i++)
- if (msa->gr[i][idx] != NULL) msa->gr[i][idx][mpos] = msa->gr[i][idx][apos];
- }
-
- if (msa->ss_cons != NULL) msa->ss_cons[mpos] = msa->ss_cons[apos];
- if (msa->sa_cons != NULL) msa->sa_cons[mpos] = msa->sa_cons[apos];
- if (msa->rf != NULL) msa->rf[mpos] = msa->rf[apos];
-
- for (i = 0; i < msa->ngc; i++)
- msa->gc[i][mpos] = msa->gc[i][apos];
- }
- mpos++;
- }
-
- msa->alen = mpos; /* set new length */
- /* null terminate everything */
- for (idx = 0; idx < msa->nseq; idx++)
- {
- msa->aseq[idx][mpos] = '\0';
- if (msa->ss != NULL && msa->ss[idx] != NULL) msa->ss[idx][mpos] = '\0';
- if (msa->sa != NULL && msa->sa[idx] != NULL) msa->sa[idx][mpos] = '\0';
-
- for (i = 0; i < msa->ngr; i++)
- if (msa->gr[i][idx] != NULL) msa->gr[i][idx][mpos] = '\0';
- }
-
- if (msa->ss_cons != NULL) msa->ss_cons[mpos] = '\0';
- if (msa->sa_cons != NULL) msa->sa_cons[mpos] = '\0';
- if (msa->rf != NULL) msa->rf[mpos] = '\0';
-
- for (i = 0; i < msa->ngc; i++)
- msa->gc[i][mpos] = '\0';
-
- return;
-}
-
-
-/* Function: MSASmallerAlignment()
- * Date: SRE, Wed Jun 30 09:56:08 1999 [St. Louis]
- *
- * Purpose: Given an array "useme" of TRUE/FALSE flags for
- * each sequence in an alignment, construct
- * and return a new alignment containing only
- * those sequences that are flagged useme=TRUE.
- *
- * Used by routines such as MSAFilterAlignment()
- * and MSASampleAlignment().
- *
- * Limitations:
- * Does not copy unparsed Stockholm markup.
- *
- * Does not make assumptions about meaning of wgt;
- * if you want the new wgt vector renormalized, do
- * it yourself with FNorm(new->wgt, new->nseq).
- *
- * Args: msa -- the original (larger) alignment
- * useme -- [0..nseq-1] array of TRUE/FALSE flags; TRUE means include
- * this seq in new alignment
- * ret_new -- RETURN: new alignment
- *
- * Returns: void
- * ret_new is allocated here; free with MSAFree()
- */
-void
-MSASmallerAlignment(MSA *msa, int *useme, MSA **ret_new)
-{
- MSA *new; /* RETURN: new alignment */
- int nnew; /* number of seqs in new msa (e.g. # of TRUEs) */
- int oidx, nidx; /* old, new indices */
- int i;
-
- nnew = 0;
- for (oidx = 0; oidx < msa->nseq; oidx++)
- if (useme[oidx]) nnew++;
- if (nnew == 0) { *ret_new = NULL; return; }
-
- new = MSAAlloc(nnew, 0);
- nidx = 0;
- for (oidx = 0; oidx < msa->nseq; oidx++)
- if (useme[oidx])
- {
- new->aseq[nidx] = sre_strdup(msa->aseq[oidx], msa->alen);
- new->sqname[nidx] = sre_strdup(msa->sqname[oidx], msa->alen);
- GKIStoreKey(new->index, msa->sqname[oidx]);
- new->wgt[nidx] = msa->wgt[oidx];
- if (msa->sqacc != NULL)
- MSASetSeqAccession(new, nidx, msa->sqacc[oidx]);
- if (msa->sqdesc != NULL)
- MSASetSeqDescription(new, nidx, msa->sqdesc[oidx]);
- if (msa->ss != NULL && msa->ss[oidx] != NULL)
- {
- if (new->ss == NULL) new->ss = MallocOrDie(sizeof(char *) * new->nseq);
- new->ss[nidx] = sre_strdup(msa->ss[oidx], -1);
- }
- if (msa->sa != NULL && msa->sa[oidx] != NULL)
- {
- if (new->sa == NULL) new->sa = MallocOrDie(sizeof(char *) * new->nseq);
- new->sa[nidx] = sre_strdup(msa->sa[oidx], -1);
- }
- nidx++;
- }
-
- new->nseq = nnew;
- new->alen = msa->alen;
- new->flags = msa->flags;
- new->type = msa->type;
- new->name = sre_strdup(msa->name, -1);
- new->desc = sre_strdup(msa->desc, -1);
- new->acc = sre_strdup(msa->acc, -1);
- new->au = sre_strdup(msa->au, -1);
- new->ss_cons = sre_strdup(msa->ss_cons, -1);
- new->sa_cons = sre_strdup(msa->sa_cons, -1);
- new->rf = sre_strdup(msa->rf, -1);
- for (i = 0; i < MSA_MAXCUTOFFS; i++) {
- new->cutoff[i] = msa->cutoff[i];
- new->cutoff_is_set[i] = msa->cutoff_is_set[i];
- }
- free(new->sqlen);
-
- MSAMingap(new);
- *ret_new = new;
- return;
-}
-
-
-/*****************************************************************
- * Retrieval routines
- *
- * Access to MSA structure data is possible through these routines.
- * I'm not doing this because of object oriented design, though
- * it might work in my favor someday.
- * I'm doing this because lots of MSA data is optional, and
- * checking through the chain of possible NULLs is a pain.
- *****************************************************************/
-
-char *
-MSAGetSeqAccession(MSA *msa, int idx)
-{
- if (msa->sqacc != NULL && msa->sqacc[idx] != NULL)
- return msa->sqacc[idx];
- else
- return NULL;
-}
-char *
-MSAGetSeqDescription(MSA *msa, int idx)
-{
- if (msa->sqdesc != NULL && msa->sqdesc[idx] != NULL)
- return msa->sqdesc[idx];
- else
- return NULL;
-}
-char *
-MSAGetSeqSS(MSA *msa, int idx)
-{
- if (msa->ss != NULL && msa->ss[idx] != NULL)
- return msa->ss[idx];
- else
- return NULL;
-}
-char *
-MSAGetSeqSA(MSA *msa, int idx)
-{
- if (msa->sa != NULL && msa->sa[idx] != NULL)
- return msa->sa[idx];
- else
- return NULL;
-}
-
-
-/*****************************************************************
- * Information routines
- *
- * Access information about the MSA.
- *****************************************************************/
-
-/* Function: MSAAverageSequenceLength()
- * Date: SRE, Sat Apr 6 09:41:34 2002 [St. Louis]
- *
- * Purpose: Return the average length of the (unaligned) sequences
- * in the MSA.
- *
- * Args: msa - the alignment
- *
- * Returns: average length
- */
-float
-MSAAverageSequenceLength(MSA *msa)
-{
- int i;
- float avg;
-
- avg = 0.;
- for (i = 0; i < msa->nseq; i++)
- avg += (float) DealignedLength(msa->aseq[i]);
-
- if (msa->nseq == 0) return 0.;
- else return (avg / msa->nseq);
-}
-
-
diff --git a/squid/msa.h b/squid/msa.h
deleted file mode 100644
index 36cf409..0000000
--- a/squid/msa.h
+++ /dev/null
@@ -1,298 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#ifndef SQUID_MSA_INCLUDED
-#define SQUID_MSA_INCLUDED
-
-/* msa.h
- * SRE, Mon May 17 10:24:30 1999
- *
- * Header file for SQUID's multiple sequence alignment
- * manipulation code.
- *
- * RCS $Id: msa.h,v 1.13 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-#include <stdio.h> /* FILE support */
-#include "gki.h" /* hash table support */
-#include "ssi.h" /* sequence file index support */
-#include "squid.h" /* need SQINFO */
-
-/****************************************************
- * Obsolete alignment information, AINFO
- * Superceded by MSA structure further below; but we
- * need AINFO for the near future for backwards
- * compatibility.
- ****************************************************/
-/* Structure: aliinfo_s
- *
- * Purpose: Optional information returned from an alignment file.
- *
- * flags: always used. Flags for which info is valid/alloced.
- *
- * alen: mandatory. Alignments are always flushed right
- * with gaps so that all aseqs are the same length, alen.
- * Available for all alignment formats.
- *
- * nseq: mandatory. Aligned seqs are indexed 0..nseq-1.
- *
- * wgt: 0..nseq-1 vector of sequence weights. Mandatory.
- * If not explicitly set, weights are initialized to 1.0.
- *
- * cs: 0..alen-1, just like the alignment. Contains single-letter
- * secondary structure codes for consensus structure; "<>^+"
- * for RNA, "EHL." for protein. May be NULL if unavailable
- * from seqfile. Only available for SELEX format files.
- *
- * rf: 0..alen-1, just like the alignment. rf is an arbitrary string
- * of characters, used for annotating columns. Blanks are
- * interpreted as non-canonical columns and anything else is
- * considered canonical. Only available from SELEX files.
- *
- * sqinfo: mandatory. Array of 0..nseq-1
- * per-sequence information structures, carrying
- * name, id, accession, coords.
- *
- */
-struct aliinfo_s {
- int flags; /* flags for what info is valid */
- int alen; /* length of alignment (columns) */
- int nseq; /* number of seqs in alignment */
- float *wgt; /* sequence weights [0..nseq-1] */
- char *cs; /* consensus secondary structure string */
- char *rf; /* reference coordinate system */
- struct seqinfo_s *sqinfo; /* name, id, coord info for each sequence */
-
- /* Pfam/HMMER pick-ups */
- char *name; /* name of alignment */
- char *desc; /* description of alignment */
- char *acc; /* accession of alignment */
- char *au; /* "author" information */
- float tc1, tc2; /* trusted score cutoffs (per-seq, per-domain) */
- float nc1, nc2; /* noise score cutoffs (per-seq, per-domain) */
- float ga1, ga2; /* gathering cutoffs */
-};
-typedef struct aliinfo_s AINFO;
-#define AINFO_TC (1 << 0)
-#define AINFO_NC (1 << 1)
-#define AINFO_GA (1 << 2)
-
-/*****************************************************************
- * MSA
- * SRE, Sun Jun 27 15:03:35 1999 [TW 723 over Greenland]
- *
- * Defines the new data structure and API for multiple
- * sequence alignment i/o.
- *****************************************************************/
-
-/* The following constants define the Pfam/Rfam cutoff set we'll propagate
- * from msa's into HMMER and Infernal models.
- */
-#define MSA_CUTOFF_TC1 0
-#define MSA_CUTOFF_TC2 1
-#define MSA_CUTOFF_GA1 2
-#define MSA_CUTOFF_GA2 3
-#define MSA_CUTOFF_NC1 4
-#define MSA_CUTOFF_NC2 5
-#define MSA_MAXCUTOFFS 6
-
-/* Structure: MSA
- * SRE, Tue May 18 11:33:08 1999
- *
- * Our object for a multiple sequence alignment.
- */
-typedef struct msa_struct {
- /* Mandatory information associated with the alignment.
- */
- char **aseq; /* the alignment itself, [0..nseq-1][0..alen-1] */
- char **sqname; /* names of sequences, [0..nseq-1][0..alen-1] */
- float *wgt; /* sequence weights [0..nseq-1] */
- int alen; /* length of alignment (columns) */
- int nseq; /* number of seqs in alignment */
-
- /* Optional information that we understand, and might have.
- */
- int flags; /* flags for what optional info is valid */
- int type; /* kOtherSeq, kRNA/hmmNUCLEIC, or kAmino/hmmAMINO */
- char *name; /* name of alignment, or NULL */
- char *desc; /* description of alignment, or NULL */
- char *acc; /* accession of alignment, or NULL */
- char *au; /* "author" information, or NULL */
- char *ss_cons; /* consensus secondary structure string, or NULL */
- char *sa_cons; /* consensus surface accessibility string, or NULL */
- char *rf; /* reference coordinate system, or NULL */
- char **sqacc; /* accession numbers for individual sequences */
- char **sqdesc; /* description lines for individual sequences */
- char **ss; /* per-seq secondary structure annotation, or NULL */
- char **sa; /* per-seq surface accessibility annotation, or NULL */
- float cutoff[MSA_MAXCUTOFFS]; /* NC, TC, GA cutoffs propagated to Pfam/Rfam */
- int cutoff_is_set[MSA_MAXCUTOFFS];/* TRUE if a cutoff is set; else FALSE */
-
- /* Optional information that we don't understand.
- * That is, we know what type of information it is, but it's
- * either (interpreted as) free-text comment, or it's Stockholm
- * markup with unfamiliar tags.
- */
- char **comment; /* free text comments, or NULL */
- int ncomment; /* number of comment lines */
- int alloc_ncomment; /* number of comment lines alloc'ed */
-
- char **gf_tag; /* markup tags for unparsed #=GF lines */
- char **gf; /* annotations for unparsed #=GF lines */
- int ngf; /* number of unparsed #=GF lines */
- int alloc_ngf; /* number of gf lines alloc'ed */
-
- char **gs_tag; /* markup tags for unparsed #=GS lines */
- char ***gs; /* [0..ngs-1][0..nseq-1][free text] markup */
- GKI *gs_idx; /* hash of #=GS tag types */
- int ngs; /* number of #=GS tag types */
-
- char **gc_tag; /* markup tags for unparsed #=GC lines */
- char **gc; /* [0..ngc-1][0..alen-1] markup */
- GKI *gc_idx; /* hash of #=GC tag types */
- int ngc; /* number of #=GC tag types */
-
- char **gr_tag; /* markup tags for unparsed #=GR lines */
- char ***gr; /* [0..ngr][0..nseq-1][0..alen-1] markup */
- GKI *gr_idx; /* hash of #=GR tag types */
- int ngr; /* number of #=GR tag types */
-
- /* Stuff we need for our own maintenance of the data structure
- */
- GKI *index; /* name ->seqidx hash table */
- int nseqalloc; /* number of seqs currently allocated for */
- int nseqlump; /* lump size for dynamic expansions of nseq */
- int *sqlen; /* individual sequence lengths during parsing */
- int *sslen; /* individual ss lengths during parsing */
- int *salen; /* individual sa lengths during parsing */
- int lastidx; /* last index we saw; use for guessing next */
-} MSA;
-#define MSA_SET_WGT (1 << 0) /* track whether wgts were set, or left at default 1.0 */
-
-
-/* Structure: MSAFILE
- * SRE, Tue May 18 11:36:54 1999
- *
- * Defines an alignment file that's open for reading.
- */
-typedef struct msafile_struct {
- FILE *f; /* open file pointer */
- char *fname; /* name of file. used for diagnostic output */
- int linenumber; /* what line are we on in the file */
-
- char *buf; /* buffer for line input w/ sre_fgets() */
- int buflen; /* current allocated length for buf */
-
- SSIFILE *ssi; /* open SSI index file; or NULL, if none. */
-
- int do_gzip; /* TRUE if f is a pipe from gzip -dc (need pclose(f)) */
- int do_stdin; /* TRUE if f is stdin (don't close f, not our problem) */
- int format; /* format of alignment file we're reading */
-} MSAFILE;
-
-
-/* Alignment file formats.
- * Must coexist with sqio.c/squid.h unaligned file format codes.
- * Rules:
- * - 0 is an unknown/unassigned format
- * - <100 reserved for unaligned formats
- * - >100 reserved for aligned formats
- */
-#define MSAFILE_UNKNOWN 0 /* unknown format */
-#define MSAFILE_STOCKHOLM 101 /* Pfam/HMMER's Stockholm format */
-#define MSAFILE_SELEX 102 /* Obsolete(!): old HMMER/SELEX format */
-#define MSAFILE_MSF 103 /* GCG MSF format */
-#define MSAFILE_CLUSTAL 104 /* Clustal V/W format */
-#define MSAFILE_A2M 105 /* aligned FASTA (A2M is UCSC terminology) */
-#define MSAFILE_PHYLIP 106 /* Felsenstein's PHYLIP format */
-#define MSAFILE_EPS 107 /* Encapsulated PostScript (output only) */
-
-#define IsAlignmentFormat(fmt) ((fmt) > 100)
-
-
-/* from msa.c
- */
-extern MSAFILE *MSAFileOpen(char *filename, int format, char *env);
-extern MSA *MSAFileRead(MSAFILE *afp);
-extern void MSAFileClose(MSAFILE *afp);
-extern void MSAFree(MSA *msa);
-extern void MSAFileWrite(FILE *fp, MSA *msa, int outfmt, int do_oneline);
-
-extern int MSAFileRewind(MSAFILE *afp);
-extern int MSAFilePositionByKey(MSAFILE *afp, char *key);
-extern int MSAFilePositionByIndex(MSAFILE *afp, int idx);
-
-extern int MSAFileFormat(MSAFILE *afp);
-extern MSA *MSAAlloc(int nseq, int alen);
-extern void MSAExpand(MSA *msa);
-extern char *MSAFileGetLine(MSAFILE *afp);
-extern void MSASetSeqAccession(MSA *msa, int seqidx, char *acc);
-extern void MSASetSeqDescription(MSA *msa, int seqidx, char *desc);
-extern void MSAAddComment(MSA *msa, char *s);
-extern void MSAAddGF(MSA *msa, char *tag, char *value);
-extern void MSAAddGS(MSA *msa, char *tag, int seqidx, char *value);
-extern void MSAAppendGC(MSA *msa, char *tag, char *value);
-extern char *MSAGetGC(MSA *msa, char *tag);
-extern void MSAAppendGR(MSA *msa, char *tag, int seqidx, char *value);
-extern void MSAVerifyParse(MSA *msa);
-extern int MSAGetSeqidx(MSA *msa, char *name, int guess);
-
-extern MSA *MSAFromAINFO(char **aseq, AINFO *ainfo);
-
-extern void MSAMingap(MSA *msa);
-extern void MSANogap(MSA *msa);
-extern void MSAShorterAlignment(MSA *msa, int *useme);
-extern void MSASmallerAlignment(MSA *msa, int *useme, MSA **ret_new);
-
-extern char *MSAGetSeqAccession(MSA *msa, int idx);
-extern char *MSAGetSeqDescription(MSA *msa, int idx);
-extern char *MSAGetSeqSS(MSA *msa, int idx);
-extern char *MSAGetSeqSA(MSA *msa, int idx);
-
-extern float MSAAverageSequenceLength(MSA *msa);
-
-/* from a2m.c
- */
-extern MSA *ReadA2M(MSAFILE *afp);
-extern void WriteA2M(FILE *fp, MSA *msa);
-
-/* from clustal.c
- */
-extern MSA *ReadClustal(MSAFILE *afp);
-extern void WriteClustal(FILE *fp, MSA *msa);
-
-/* from eps.c
- */
-extern void EPSWriteSmallMSA(FILE *fp, MSA *msa);
-
-/* from msf.c
- */
-extern MSA *ReadMSF(MSAFILE *afp);
-extern void WriteMSF(FILE *fp, MSA *msa);
-
-/* from phylip.c
- */
-extern MSA *ReadPhylip(MSAFILE *afp);
-extern void WritePhylip(FILE *fp, MSA *msa);
-
-/* from selex.c
- */
-extern MSA *ReadSELEX(MSAFILE *afp);
-extern void WriteSELEX(FILE *fp, MSA *msa);
-extern void WriteSELEXOneBlock(FILE *fp, MSA *msa);
-
-/* from stockholm.c
- */
-extern MSA *ReadStockholm(MSAFILE *afp);
-extern void WriteStockholm(FILE *fp, MSA *msa);
-extern void WriteStockholmOneBlock(FILE *fp, MSA *msa);
-
-#endif /*SQUID_MSA_INCLUDED*/
diff --git a/squid/msf.c b/squid/msf.c
deleted file mode 100644
index f0ac14e..0000000
--- a/squid/msf.c
+++ /dev/null
@@ -1,391 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* msf.c
- * SRE, Sun Jul 11 16:17:32 1993
- *
- * Import/export of GCG MSF multiple sequence alignment
- * formatted files. Designed using format specifications
- * kindly provided by Steve Smith of Genetics Computer Group.
- *
- * CVS $Id: msf.c,v 1.6 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <time.h>
-#include "squid.h"
-#include "msa.h"
-
-#ifdef TESTDRIVE_MSF
-/*****************************************************************
- * msf.c test driver:
- * cc -DTESTDRIVE_MSF -g -O2 -Wall -o test msf.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c sqio.c alignio.c selex.c interleaved.c types.c -lm
- *
- */
-int
-main(int argc, char **argv)
-{
- MSAFILE *afp;
- MSA *msa;
- char *file;
-
- file = argv[1];
-
- if ((afp = MSAFileOpen(file, MSAFILE_STOCKHOLM, NULL)) == NULL)
- Die("Couldn't open %s\n", file);
-
- while ((msa = ReadMSF(afp)) != NULL)
- {
- WriteMSF(stdout, msa);
- MSAFree(msa);
- }
-
- MSAFileClose(afp);
- exit(0);
-}
-/******************************************************************/
-#endif /* testdrive_msf */
-
-
-
-/* Function: ReadMSF()
- * Date: SRE, Tue Jun 1 08:07:22 1999 [St. Louis]
- *
- * Purpose: Parse an alignment read from an open MSF format
- * alignment file. (MSF is a single-alignment format.)
- * Return the alignment, or NULL if we've already
- * read the alignment.
- *
- * Args: afp - open alignment file
- *
- * Returns: MSA * - an alignment object
- * caller responsible for an MSAFree()
- * NULL if no more alignments
- *
- * Diagnostics:
- * Will Die() here with a (potentially) useful message
- * if a parsing error occurs.
- */
-MSA *
-ReadMSF(MSAFILE *afp)
-{
- MSA *msa;
- char *s;
- int alleged_alen;
- int alleged_type;
- int alleged_checksum;
- char *tok;
- char *sp;
- int slen;
- int sqidx;
- char *name;
- char *seq;
-
- if (feof(afp->f)) return NULL;
- if ((s = MSAFileGetLine(afp)) == NULL) return NULL;
-
- /* The first line is the header.
- * This is a new-ish GCG feature. Don't count on it, so
- * we can be a bit more tolerant towards non-GCG software
- * generating "MSF" files.
- */
- msa = MSAAlloc(10, 0);
- if (strncmp(s, "!!AA_MULTIPLE_ALIGNMENT", 23) == 0) {
- msa->type = kAmino;
- if ((s = MSAFileGetLine(afp)) == NULL) return NULL;
- } else if (strncmp(s, "!!NA_MULTIPLE_ALIGNMENT", 23) == 0) {
- msa->type = kRNA;
- if ((s = MSAFileGetLine(afp)) == NULL) return NULL;
- }
-
- /* Now we're in the free text comment section of the MSF file.
- * It ends when we see the "MSF: Type: Check: .." line.
- * This line must be present.
- */
- do
- {
- if ((strstr(s, "..") != NULL && strstr(s, "MSF:") != NULL) &&
- Strparse("^.+MSF: +([0-9]+) +Type: +([PNX]).+Check: +([0-9]+) +\\.\\.", s, 3))
- {
- alleged_alen = atoi(sqd_parse[0]);
- switch (*(sqd_parse[1])) {
- case 'N' : alleged_type = kRNA; break;
- case 'P' : alleged_type = kAmino; break;
- case 'X' : alleged_type = kOtherSeq; break;
- default : alleged_type = kOtherSeq;
- }
- alleged_checksum = atoi(sqd_parse[3]);
- if (msa->type == kOtherSeq) msa->type = alleged_type;
- break; /* we're done with comment section. */
- }
- if (! IsBlankline(s))
- MSAAddComment(msa, s);
- } while ((s = MSAFileGetLine(afp)) != NULL);
-
- /* Now we're in the name section.
- * GCG has a relatively poorly documented feature: only sequences that
- * appear in this list will be read from the alignment section. Commenting
- * out sequences in the name list (by preceding them with "!") is
- * allowed as a means of manually defining subsets of sequences in
- * the alignment section. We can support this feature reasonably
- * easily because of the hash table for names in the MSA: we
- * only add names to the hash table when we see 'em in the name section.
- */
- while ((s = MSAFileGetLine(afp)) != NULL)
- {
- while ((*s == ' ' || *s == '\t') && *s) s++; /* skip leading whitespace */
-
- if (*s == '\n') continue; /* skip blank lines */
- else if (*s == '!') MSAAddComment(msa, s);
- else if ((sp = strstr(s, "Name:")) != NULL)
- {
- /* We take the name and the weigh, and that's it */
- sp += 5;
- tok = sre_strtok(&sp, " \t", &slen); /* <sequence name> */
- sqidx = GKIStoreKey(msa->index, tok);
- if (sqidx >= msa->nseqalloc) MSAExpand(msa);
- msa->sqname[sqidx] = sre_strdup(tok, slen);
- msa->nseq++;
-
- if ((sp = strstr(sp, "Weight:")) == NULL)
- Die("No Weight: on line %d for %s in name section of MSF file %s\n",
- afp->linenumber, msa->sqname[sqidx], afp->fname);
- sp += 7;
- tok = sre_strtok(&sp, " \t", &slen);
- msa->wgt[sqidx] = atof(tok);
- msa->flags |= MSA_SET_WGT;
- }
- else if (strncmp(s, "//", 2) == 0)
- break;
- else
- {
- Die("Invalid line (probably %d) in name section of MSF file %s:\n%s\n",
- afp->linenumber, afp->fname, s);
- squid_errno = SQERR_FORMAT; /* NOT THREADSAFE */
- return NULL;
- }
-
- }
-
- /* And now we're in the sequence section.
- * As discussed above, if we haven't seen a sequence name, then we
- * don't include the sequence in the alignment.
- * Also, watch out for coordinate-only lines.
- */
- while ((s = MSAFileGetLine(afp)) != NULL)
- {
- sp = s;
- if ((name = sre_strtok(&sp, " \t", NULL)) == NULL) continue;
- if ((seq = sre_strtok(&sp, "\n", &slen)) == NULL) continue;
-
- /* The test for a coord line: digits starting both fields
- */
- if (isdigit((int) *name) && isdigit((int) *seq))
- continue;
-
- /* It's not blank, and it's not a coord line: must be sequence
- */
- sqidx = GKIKeyIndex(msa->index, name);
- if (sqidx < 0) continue; /* not a sequence we recognize */
-
- msa->sqlen[sqidx] = sre_strcat(&(msa->aseq[sqidx]), msa->sqlen[sqidx], seq, slen);
- }
-
- /* We've left blanks in the aseqs; take them back out.
- */
- for (sqidx = 0; sqidx < msa->nseq; sqidx++)
- {
- if (msa->aseq[sqidx] == NULL)
- Die("Didn't find a sequence for %s in MSF file %s\n", msa->sqname[sqidx], afp->fname);
-
- for (s = sp = msa->aseq[sqidx]; *s != '\0'; s++)
- {
- if (*s == ' ' || *s == '\t') {
- msa->sqlen[sqidx]--;
- } else {
- *sp = *s;
- sp++;
- }
- }
- *sp = '\0';
- }
-
- MSAVerifyParse(msa); /* verifies, and also sets alen and wgt. */
- return msa;
-}
-
-
-/* Function: WriteMSF()
- * Date: SRE, Mon May 31 11:25:18 1999 [St. Louis]
- *
- * Purpose: Write an alignment in MSF format to an open file.
- *
- * Args: fp - file that's open for writing.
- * msa - alignment to write.
- *
- * Note that msa->type, usually optional, must be
- * set for WriteMSF to work. If it isn't, a fatal
- * error is generated.
- *
- * Returns: (void)
- */
-void
-WriteMSF(FILE *fp, MSA *msa)
-{
- time_t now; /* current time as a time_t */
- char date[64]; /* today's date in GCG's format "October 3, 1996 15:57" */
- char **gcg_aseq; /* aligned sequences with gaps converted to GCG format */
- char **gcg_sqname; /* sequence names with GCG-valid character sets */
- int idx; /* counter for sequences */
- char *s; /* pointer into sqname or seq */
- int len; /* tmp variable for name lengths */
- int namelen; /* maximum name length used */
- int pos; /* position counter */
- char buffer[51]; /* buffer for writing seq */
- int i; /* another position counter */
-
- /*****************************************************************
- * Make copies of sequence names and sequences.
- * GCG recommends that name characters should only contain
- * alphanumeric characters, -, or _
- * Some GCG and GCG-compatible software is sensitive to this.
- * We silently convert all other characters to '_'.
- *
- * For sequences, GCG allows only ~ and . for gaps.
- * Otherwise, everthing is interpreted as a residue;
- * so squid's IUPAC-restricted chars are fine. ~ means
- * an external gap. . means an internal gap.
- *****************************************************************/
-
- /* make copies that we can edit */
- gcg_aseq = MallocOrDie(sizeof(char *) * msa->nseq);
- gcg_sqname = MallocOrDie(sizeof(char *) * msa->nseq);
- for (idx = 0; idx < msa->nseq; idx++)
- {
- gcg_aseq[idx] = sre_strdup(msa->aseq[idx], msa->alen);
- gcg_sqname[idx] = sre_strdup(msa->sqname[idx], -1);
- }
- /* alter names as needed */
- for (idx = 0; idx < msa->nseq; idx++)
- for (s = gcg_sqname[idx]; *s != '\0'; s++)
- if (! isalnum((int) *s) && *s != '-' && *s != '_')
- *s = '_';
- /* alter gap chars in seq */
- for (idx = 0; idx < msa->nseq; idx++)
- {
- for (s = gcg_aseq[idx]; *s != '\0' && isgap(*s); s++)
- *s = '~';
- for (; *s != '\0'; s++)
- if (isgap(*s)) *s = '.';
- for (pos = msa->alen-1; pos > 0 && isgap(gcg_aseq[idx][pos]); pos--)
- gcg_aseq[idx][pos] = '~';
- }
- /* calculate max namelen used */
- namelen = 0;
- for (idx = 0; idx < msa->nseq; idx++)
- if ((len = strlen(msa->sqname[idx])) > namelen)
- namelen = len;
-
- /*****************************************************
- * Write the MSF header
- *****************************************************/
- /* required file type line */
- if (msa->type == kOtherSeq)
- msa->type = GuessAlignmentSeqtype(msa->aseq, msa->nseq);
-
- if (msa->type == kRNA) fprintf(fp, "!!NA_MULTIPLE_ALIGNMENT 1.0\n");
- else if (msa->type == kDNA) fprintf(fp, "!!NA_MULTIPLE_ALIGNMENT 1.0\n");
- else if (msa->type == kAmino) fprintf(fp, "!!AA_MULTIPLE_ALIGNMENT 1.0\n");
- else if (msa->type == kOtherSeq)
- Die("WriteMSF(): couldn't guess whether that alignment is RNA or protein.\n");
- else
- Die("Invalid sequence type %d in WriteMSF()\n", msa->type);
-
- /* free text comments */
- if (msa->ncomment > 0)
- {
- for (idx = 0; idx < msa->ncomment; idx++)
- fprintf(fp, "%s\n", msa->comment[idx]);
- fprintf(fp, "\n");
- }
- /* required checksum line */
- now = time(NULL);
- if (strftime(date, 64, "%B %d, %Y %H:%M", localtime(&now)) == 0)
- Die("What time is it on earth? strftime() failed in WriteMSF().\n");
- fprintf(fp, " %s MSF: %d Type: %c %s Check: %d ..\n",
- msa->name != NULL ? msa->name : "squid.msf",
- msa->alen,
- msa->type == kRNA ? 'N' : 'P',
- date,
- GCGMultchecksum(gcg_aseq, msa->nseq));
- fprintf(fp, "\n");
-
- /*****************************************************
- * Names/weights section
- *****************************************************/
-
- for (idx = 0; idx < msa->nseq; idx++)
- {
- fprintf(fp, " Name: %-*.*s Len: %5d Check: %4d Weight: %.2f\n",
- namelen, namelen,
- gcg_sqname[idx],
- msa->alen,
- GCGchecksum(gcg_aseq[idx], msa->alen),
- msa->wgt[idx]);
- }
- fprintf(fp, "\n");
- fprintf(fp, "//\n");
-
- /*****************************************************
- * Write the sequences
- *****************************************************/
-
- for (pos = 0; pos < msa->alen; pos += 50)
- {
- fprintf(fp, "\n"); /* Blank line between sequence blocks */
-
- /* Coordinate line */
- len = (pos + 50) > msa->alen ? msa->alen - pos : 50;
- if (len > 10)
- fprintf(fp, "%*s %-6d%*s%6d\n", namelen, "",
- pos+1,
- len + ((len-1)/10) - 12, "",
- pos + len);
- else
- fprintf(fp, "%*s %-6d\n", namelen, "", pos+1);
-
- for (idx = 0; idx < msa->nseq; idx++)
- {
- fprintf(fp, "%-*s ", namelen, gcg_sqname[idx]);
- /* get next line's worth of 50 from seq */
- strncpy(buffer, gcg_aseq[idx] + pos, 50);
- buffer[50] = '\0';
- /* draw the sequence line */
- for (i = 0; i < len; i++)
- {
- if (! (i % 10)) fputc(' ', fp);
- fputc(buffer[i], fp);
- }
- fputc('\n', fp);
- }
- }
-
- Free2DArray((void **) gcg_aseq, msa->nseq);
- Free2DArray((void **) gcg_sqname, msa->nseq);
- return;
-}
-
-
-
diff --git a/squid/phylip.c b/squid/phylip.c
deleted file mode 100644
index a116b2b..0000000
--- a/squid/phylip.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* phylip.c
- * SRE, Mon Jun 14 14:08:33 1999 [St. Louis]
- *
- * Import/export of PHYLIP interleaved multiple sequence alignment
- * format files.
- *
- * CVS $Id: phylip.c,v 1.3 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include "squid.h"
-#include "msa.h"
-
-#ifdef TESTDRIVE_PHYLIP
-/*****************************************************************
- * phylip.c test driver:
- *
- */
-int
-main(int argc, char **argv)
-{
- MSAFILE *afp;
- MSA *msa;
- char *file;
-
- file = argv[1];
-
- if ((afp = MSAFileOpen(file, MSAFILE_UNKNOWN, NULL)) == NULL)
- Die("Couldn't open %s\n", file);
-
- printf("format %d\n", afp->format);
-
- while ((msa = ReadPhylip(afp)) != NULL)
- {
- WritePhylip(stdout, msa);
- MSAFree(msa);
- }
-
- MSAFileClose(afp);
- exit(0);
-}
-/******************************************************************/
-#endif /* testdrive_phylip */
-
-
-
-/* Function: ReadPhylip()
- * Date: SRE, Fri Jun 18 12:59:37 1999 [Sanger Centre]
- *
- * Purpose: Parse an alignment from an open Phylip format
- * alignment file. Phylip is a single-alignment format.
- * Return the alignment, or NULL if we have no data.
- *
- * Args: afp - open alignment file
- *
- * Returns: MSA * - an alignment object
- * Caller responsible for an MSAFree()
- * NULL if no more alignments
- */
-MSA *
-ReadPhylip(MSAFILE *afp)
-{
- MSA *msa;
- char *s, *s1, *s2;
- char name[11]; /* seq name max len = 10 char */
- int nseq, alen;
- int idx; /* index of current sequence */
- int slen;
- int nblock;
-
- if (feof(afp->f)) return NULL;
-
- /* Skip until we see a nonblank line; it's the header,
- * containing nseq/alen
- */
- nseq = 0; alen = 0;
- while ((s = MSAFileGetLine(afp)) != NULL)
- {
- if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) == NULL) continue;
- if ((s2 = sre_strtok(&s, WHITESPACE, NULL)) == NULL)
- Die("Failed to parse nseq/alen from first line of PHYLIP file %s\n", afp->fname);
- if (! IsInt(s1) || ! IsInt(s2))
- Die("nseq and/or alen not an integer in first line of PHYLIP file %s\n", afp->fname);
- nseq = atoi(s1);
- alen = atoi(s2);
- break;
- }
-
- msa = MSAAlloc(nseq, 0);
- idx = 0;
- nblock = 0;
- while ((s = MSAFileGetLine(afp)) != NULL)
- {
- /* ignore blank lines. nonblank lines start w/ nonblank char */
- if (isspace((int) *s)) continue;
- /* First block has seq names */
- if (nblock == 0) {
- strncpy(name, s, 10);
- name[10] = '\0';
- GKIStoreKey(msa->index, name);
- msa->sqname[idx] = sre_strdup(name, -1);
- s += 10;
- }
- /* be careful of trailing whitespace on lines */
- if ((s1 = sre_strtok(&s, WHITESPACE, &slen)) == NULL)
- Die("Failed to parse sequence at line %d of PHYLIP file %s\n",
- afp->linenumber, afp->fname);
- msa->sqlen[idx] = sre_strcat(&(msa->aseq[idx]), msa->sqlen[idx], s1, slen);
-
- idx++;
- if (idx == nseq) { idx = 0; nblock++; }
- }
- msa->nseq = nseq;
- MSAVerifyParse(msa); /* verifies; sets alen, wgt; frees sqlen[] */
- return msa;
-}
-
-
-
-/* Function: WritePhylip()
- * Date: SRE, Fri Jun 18 12:07:41 1999 [Sanger Centre]
- *
- * Purpose: Write an alignment in Phylip format to an open file.
- *
- * Args: fp - file that's open for writing.
- * msa - alignment to write.
- *
- * Returns: (void)
- */
-void
-WritePhylip(FILE *fp, MSA *msa)
-{
- int idx; /* counter for sequences */
- int cpl = 50; /* 50 seq char per line */
- char buf[51]; /* buffer for writing seq */
- int pos;
-
- /* First line has nseq, alen
- */
- fprintf(fp, " %d %d\n", msa->nseq, msa->alen);
-
- /* Alignment section.
- * PHYLIP is a multiblock format, blocks (optionally) separated
- * by blanks; names only attached to first block. Names are
- * restricted to ten char; we achieve this by simple truncation (!).
- * (Do we need to convert gap characters from our ./- convention?)
- */
- for (pos = 0; pos < msa->alen; pos += cpl)
- {
- if (pos > 0) fprintf(fp, "\n");
-
- for (idx = 0; idx < msa->nseq; idx++)
- {
- strncpy(buf, msa->aseq[idx] + pos, cpl);
- buf[cpl] = '\0';
- if (pos > 0) fprintf(fp, "%s\n", buf);
- else fprintf(fp, "%-10.10s%s\n", msa->sqname[idx], buf);
- }
- }
- return;
-}
diff --git a/squid/revcomp.c b/squid/revcomp.c
deleted file mode 100644
index bbe833a..0000000
--- a/squid/revcomp.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* revcomp.c
- *
- * Reverse complement of a IUPAC character string
- * CVS $Id: revcomp.c,v 1.6 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include "squid.h"
-
-/* Function: revcomp()
- *
- * Purpose: Reverse complement seq; store in comp.
- * Can revcomp "in place" (revcomp(seq, seq)).
- *
- * Args: comp - destination for reverse complement of seq
- * seq - sequence to reverse complement
- *
- * Returns: NULL on failure; or a (useless) pointer to comp.
- */
-char *
-revcomp(char *comp, char *seq)
-{
- char *s;
- char c;
-
- if (comp == NULL) return NULL;
- if (seq == NULL) return NULL;
-
- StrReverse(comp, seq);
- for (s = comp; *s != '\0'; s++)
- {
- c = *s;
- c = sre_toupper(c);
- switch (c) {
- case 'A': c = 'T'; break;
- case 'C': c = 'G'; break;
- case 'G': c = 'C'; break;
- case 'T': c = 'A'; break;
- case 'U': c = 'A'; break;
- case 'R': c = 'Y'; break;
- case 'Y': c = 'R'; break;
- case 'M': c = 'K'; break;
- case 'K': c = 'M'; break;
- case 'S': c = 'S'; break;
- case 'W': c = 'W'; break;
- case 'H': c = 'D'; break;
- case 'D': c = 'H'; break;
- case 'B': c = 'V'; break;
- case 'V': c = 'B'; break;
- default: break; /* anything else? leave it; it's prob a gap or an X */
- }
- if (islower((int) *s)) c = (char) sre_tolower((int) c);
- *s = c;
- }
- return comp;
-}
-
-#ifdef REVCOMP_TESTDRIVER
-/* gcc -g -DREVCOMP_TESTDRIVER revcomp.c sre_string.c shuffle.c sre_math.c sre_ctype.c sqerror.c -lm
-*/
-int
-main(void)
-{
- float p[4] = {0.25, 0.25, 0.25, 0.25};
- char *alphabet = "ACGT";
- int len = 10;
- char *seq;
-
- seq = RandomSequence(alphabet, p, 4, len);
- printf("%s\n", seq);
- revcomp(seq, seq);
- printf("%s\n", seq);
- free(seq);
- exit(0);
-}
-#endif
diff --git a/squid/revcomp_main.c b/squid/revcomp_main.c
deleted file mode 100644
index 710f3de..0000000
--- a/squid/revcomp_main.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* main for revcomp
- *
- * revcomp - generate reverse complement of sequences
- * SRE, Thu Aug 5 17:36:57 1993
- * CVS $Id: revcomp_main.c,v 1.7 2003/10/04 18:26:49 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include "squid.h"
-
-static char banner[] = "revcomp - reverse complement a nucleic acid sequence";
-
-static char usage[] = "Usage: revcomp [-options] <seqfile>\n\
- Reverse complement a nucleic acid sequence.\n\
- Available options:\n\
- -h : help; print version and usage info\n\
-";
-
-static char experts[] = "\
-";
-
-static struct opt_s OPTIONS[] = {
- { "-h", TRUE, sqdARG_NONE },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-
-
-int
-main(int argc, char **argv)
-{
- char *seqfile; /* name of sequence file */
- SQFILE *dbfp; /* open sequence file */
- int fmt; /* format of seqfile */
- char *seq; /* sequence */
- SQINFO sqinfo; /* additional sequence info */
- char *rev; /* reverse complement */
- int swap;
-
- char *optname; /* name of option found by Getopt() */
- char *optarg; /* argument found by Getopt() */
- int optind; /* index in argv[] */
-
-
- /***********************************************
- * Parse command line
- ***********************************************/
-
- fmt = SQFILE_UNKNOWN;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg)) {
- if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc - optind != 1) Die("%s\n", usage);
- seqfile = argv[optind];
-
- if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)
- Die("Failed to open sequence file %s for reading", seqfile);
-
- while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo))
- {
- if ((rev = (char *) malloc ((sqinfo.len + 1) * sizeof(char))) == NULL)
- Die("malloc failed");
-
- revcomp(rev, seq);
- if (sqinfo.flags & (SQINFO_START | SQINFO_STOP))
- {
- swap = sqinfo.start;
- sqinfo.start = sqinfo.stop;
- sqinfo.stop = swap;
- }
- /* secondary structure of reverse strand is nonsense
- */
- if (sqinfo.flags & SQINFO_SS)
- {
- sqinfo.flags = sqinfo.flags & ~SQINFO_SS;
- free(sqinfo.ss);
- }
-
- WriteSeq(stdout, SQFILE_FASTA, rev, &sqinfo);
-
- free(rev);
- FreeSequence(seq, &sqinfo);
- }
-
- SeqfileClose(dbfp);
- return 0;
-}
diff --git a/squid/rk.c b/squid/rk.c
deleted file mode 100644
index 429127c..0000000
--- a/squid/rk.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* rk.c (originally from rnabob's patsearch.c)
- *
- * Contains a compiler and a search engine for Rabin-Karp
- * based primary sequence pattern searching on encoded
- * sequences.
- *
- * See Sedgewick, _Algorithms_, for a general discussion of
- * the Rabin-Karp algorithm. See the rkcomp or rkexec man
- * pages for specific details.
- *
- * CVS $Id: rk.c,v 1.3 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include "squid.h" /* seq encoding utilities and typedefs */
-#include "rk.h"
-
-
-Hashseq
-rkcomp(char *probe) /* A,C,G,T/U, N probe string, 0-8 nt long */
-{
- Hashseq hashprobe = 0;
- char coded[RK_HASHSIZE + 1];
- int len;
- int i;
- /* check bounds violation on probe */
- if ((len = strlen(probe)) > RK_HASHSIZE) return 0;
- /* encode the probe */
- if (seqencode(coded, probe) == 0) return 0;
- /* pack the probe into a Hashseq */
- for (i = 0; i < len; i++)
- {
- hashprobe <<= 4;
- hashprobe |= (Hashseq) coded[i];
- }
- /* left adjust as needed */
- for (; i < RK_HASHSIZE; i++)
- {
- hashprobe <<= 4;
- hashprobe |= (Hashseq) NTN;
- }
- /* return the compiled probe */
- return hashprobe;
-}
-
-int
-rkseq(Hashseq hashprobe, /* up to 8 nt packed into the probe */
- char *sequence) /* encoded sequence */
-{
- long i;
- long pos = 0;
- Hashseq target = 0;
-
- /* initialize the target hashseq */
- for (i = 0; i < RK_HASHSIZE; i++)
- {
- if (*(sequence + i) == NTEND)
- break;
- target <<= 4;
- target |= (Hashseq) (*(sequence + i));
- }
-
- while (*(sequence + pos + RK_HASHSIZE -1) != NTEND)
- {
-#ifdef DEBUG
- printf("hashprobe: ");
- writehash(hashprobe);
- printf("\ttarget: ");
- writehash(target);
- printf("\nhashprobe & target: ");
- writehash(hashprobe & target);
- printf("\n");
-#endif
- if ((hashprobe & target) == target)
- return ((int) pos);
- target <<= 4;
- target |= (Hashseq) (*(sequence + pos + RK_HASHSIZE));
- pos++;
- }
- /* now we deal with an end effect */
- for (i = 0; i < RK_HASHSIZE; i++)
- {
- target |= (Hashseq) NTN;
- if ((hashprobe & target) == target)
- return ((int) pos);
- target <<=4;
- pos++;
- }
-
- return(-1);
-}
-
-
-#ifdef DEBUG /* Debugging aids */
-
-static void
-writehash(Hashseq hashseq)
-{
- int idx;
- int sym;
-
- if (hashseq/16)
- writehash(hashseq/16);
-
- sym = (int) (hashseq % 16);
- if (sym == 0)
- putchar('-');
- else
- {
- for (idx = 0; sym != iupac[idx].code && idx < IUPACSYMNUM; idx++);
- if (idx > IUPACSYMNUM)
- printf("(%d)", sym);
- else
- putchar(iupac[idx].sym);
- }
-}
-
-#endif
diff --git a/squid/rk.h b/squid/rk.h
deleted file mode 100644
index f6c1732..0000000
--- a/squid/rk.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#ifndef SQRKH_INCLUDED
-#define SQRKH_INCLUDED
-
-/* rk.h
- *
- * Header file for Rabin-Karp pattern searching on encoded
- * sequence strings.
- *
- * Sean Eddy, Thu Oct 1 11:45:42 1992
- * RCS $Id: rk.h,v 1.2 1998/10/09 18:07:16 eddy Exp $
- */
-
-
- /* expect 32 bits for 8 nt */
-typedef unsigned long Hashseq;
- /* but we count to be sure...
- RK_HASHSIZE is the number of nt that fit
- in one probe */
-#define RK_HASHSIZE (sizeof(Hashseq)*2)
- /* empirically, how many nt minimum we require
- in a pattern before we abandon rk and
- go with something else */
-#define RK_REQUIRE 4
-
-extern int rkseq(Hashseq hashprobe, char *sequence);
-extern Hashseq rkcomp(char *probe); /* compile a Hashseq from a pattern */
-
-
-
-#endif /* SQRKH_INCLUDED */
diff --git a/squid/selex.c b/squid/selex.c
deleted file mode 100644
index 2d5d0b7..0000000
--- a/squid/selex.c
+++ /dev/null
@@ -1,828 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* selex.c
- *
- * SRE, Mon Jun 14 11:08:38 1999
- * SELEX obsolete as the preferred HMMER/SQUID format
- * replaced by Stockholm format
- * selex support retained for backwards compatibility
- * kludged to use the MSA interface
- *
- * SRE, Mon Jan 30 14:41:49 1995:
- * #=SA side chain % surface accessibility annotation supported
- *
- * SRE, Tue Nov 9 17:40:50 1993:
- * major revision. #= special comments and aliinfo_s optional
- * alignment info support added. Support for #=CS (consensus
- * secondary structure), #=SS (individual secondary structure),
- * #=RF (reference coordinate system), #=SQ (per-sequence header info),
- * and #=AU ("author") added.
- *
- * Fri Dec 4 17:43:24 1992, SRE:
- * Reading and writing aligned sequences to/from disk files.
- * Implements a new, broader specification of SELEX format
- * and supercedes alignio.c.
- *
- * SELEX format is documented in Docs/formats.tex.
- ****************************************************************************
- * CVS $Id: selex.c,v 1.12 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include <memory.h>
-#include "squid.h"
-#include "msa.h"
-
-static int copy_alignment_line(char *aseq, int apos, int name_rcol,
- char *buffer, int lcol, int rcol, char gapsym);
-static void actually_write_selex(FILE *fp, MSA *msa, int cpl);
-
-static char commentsyms[] = "%#";
-
-/* Function: ReadSELEX()
- * Date: SRE, Sun Jun 6 18:24:09 1999 [St. Louis]
- *
- * Purpose: Parse an alignment read from an open SELEX format
- * alignment file. (SELEX is a single alignment format).
- * Return the alignment, or NULL if we've already read the
- * alignment or there's no alignment data in the file.
- *
- * Limitations: SELEX is the only remaining multipass parser for
- * alignment files. It cannot read from gzip or from stdin.
- * It Die()'s here if you try. The reason for this
- * that SELEX allows space characters as gaps, so we don't
- * know the borders of an alignment block until we've seen
- * the whole block. I could rewrite to allow single-pass
- * parsing (by storing the whole block in memory) but
- * since SELEX is now legacy, why bother.
- *
- * Note that the interface is totally kludged: fastest
- * possible adaptation of old ReadSELEX() to the new
- * MSA interface.
- *
- * Args: afp - open alignment file
- *
- * Returns: MSA * - an alignment object
- * caller responsible for an MSAFree()
- * NULL if no alignment data.
- */
-MSA *
-ReadSELEX(MSAFILE *afp)
-{
- MSA *msa; /* RETURN: mult seq alignment */
- FILE *fp; /* ptr to opened seqfile */
- char **aseqs; /* aligned seqs */
- int num = 0; /* number of seqs read */
- char buffer[LINEBUFLEN]; /* input buffer for lines */
- char bufcpy[LINEBUFLEN]; /* strtok'able copy of buffer */
- struct block_struc { /** alignment data for a block: */
- int lcol; /* furthest left aligned sym */
- int rcol; /* furthest right aligned sym */
- } *blocks = NULL;
- int blocknum; /* number of blocks in file */
- char *nptr; /* ptr to start of name on line */
- char *sptr; /* ptr into sequence on line */
- int currnum; /* num. seqs in given block */
- int currblock; /* index for blocks */
- int i; /* loop counter */
- int seqidx; /* counter for seqs */
- int alen; /* length of alignment */
- int warn_names; /* becomes TRUE if names don't match between blocks */
- int headnum; /* seqidx in per-sequence header info */
- int currlen;
- int count;
- int have_cs = 0;
- int have_rf = 0;
- AINFO base_ainfo, *ainfo; /* hack: used to be passed ptr to AINFO */
-
-
- /* Convert from MSA interface to what old ReadSELEX() did:
- * - copy our open fp, rather than opening file
- * - verify that we're not reading a gzip or stdin
- */
- if (feof(afp->f)) return NULL;
- if (afp->do_gzip || afp->do_stdin)
- Die("Can't read a SELEX format alignment from a pipe, stdin, or gzip'ed file");
- fp = afp->f;
- ainfo = &base_ainfo;
-
- /***************************************************
- * First pass across file.
- * Count seqs, get names, determine column info
- * Determine what sorts of info are active in this file.
- ***************************************************/
-
- InitAinfo(ainfo);
- /* get first line of the block
- * (non-comment, non-blank) */
- do
- {
- if (fgets(buffer, LINEBUFLEN, fp) == NULL)
- { squid_errno = SQERR_NODATA; return 0; }
- strcpy(bufcpy, buffer);
- if (*buffer == '#')
- {
- if (strncmp(buffer, "#=CS", 4) == 0) have_cs = 1;
- else if (strncmp(buffer, "#=RF", 4) == 0) have_rf = 1;
- }
- }
- while ((nptr = strtok(bufcpy, WHITESPACE)) == NULL ||
- (strchr(commentsyms, *nptr) != NULL));
-
- blocknum = 0;
- warn_names = FALSE;
- while (!feof(fp))
- {
- /* allocate for info about this block. */
- if (blocknum == 0)
- blocks = (struct block_struc *) MallocOrDie (sizeof(struct block_struc));
- else
- blocks = (struct block_struc *) ReallocOrDie (blocks, (blocknum+1) * sizeof(struct block_struc));
- blocks[blocknum].lcol = LINEBUFLEN+1;
- blocks[blocknum].rcol = -1;
-
- currnum = 0;
- while (nptr != NULL) /* becomes NULL when this block ends. */
- {
- /* First block only: save names */
- if (blocknum == 0)
- {
- if (currnum == 0)
- ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO));
- else
- ainfo->sqinfo = (SQINFO *) ReallocOrDie (ainfo->sqinfo, (currnum + 1) * sizeof(SQINFO));
-
- ainfo->sqinfo[currnum].flags = 0;
- SetSeqinfoString(&(ainfo->sqinfo[currnum]), nptr, SQINFO_NAME);
- }
- else /* in each additional block: check names */
- {
- if (strcmp(ainfo->sqinfo[currnum].name, nptr) != 0)
- warn_names = TRUE;
- }
- currnum++;
-
- /* check rcol, lcol */
- if ((sptr = strtok(NULL, WHITESPACE)) != NULL)
- {
- /* is this the furthest left we've
- seen word 2 in this block? */
- if (sptr - bufcpy < blocks[blocknum].lcol)
- blocks[blocknum].lcol = sptr - bufcpy;
- /* look for right side in buffer */
- for (sptr = buffer + strlen(buffer) - 1;
- strchr(WHITESPACE, *sptr) != NULL;
- sptr --)
- /* do nothing */ ;
- if (sptr - buffer > blocks[blocknum].rcol)
- blocks[blocknum].rcol = sptr - buffer;
- }
-
- /* get the next line; blank line means end of block */
- do
- {
- if (fgets(buffer, LINEBUFLEN, fp) == NULL)
- { nptr = NULL; break; }
- strcpy(bufcpy, buffer);
-
- if (strncmp(buffer, "#=SS", 4) == 0) ainfo->sqinfo[currnum-1].flags |= SQINFO_SS;
- else if (strncmp(buffer, "#=SA", 4) == 0) ainfo->sqinfo[currnum-1].flags |= SQINFO_SA;
- else if (strncmp(buffer, "#=CS", 4) == 0) have_cs = 1;
- else if (strncmp(buffer, "#=RF", 4) == 0) have_rf = 1;
-
- if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL)
- break;
- } while (strchr(commentsyms, *nptr) != NULL);
- }
-
-
- /* check that number of sequences matches expected */
- if (blocknum == 0)
- num = currnum;
- else if (currnum != num)
- Die("Parse error in ReadSELEX()");
- blocknum++;
-
- /* get first line of next block
- * (non-comment, non-blank) */
- do
- {
- if (fgets(buffer, LINEBUFLEN, fp) == NULL) { nptr = NULL; break; }
- strcpy(bufcpy, buffer);
- }
- while ((nptr = strtok(bufcpy, WHITESPACE)) == NULL ||
- (strchr(commentsyms, *nptr) != NULL));
- }
-
-
- /***************************************************
- * Get ready for second pass:
- * figure out the length of the alignment
- * malloc space
- * rewind the file
- ***************************************************/
-
- alen = 0;
- for (currblock = 0; currblock < blocknum; currblock++)
- alen += blocks[currblock].rcol - blocks[currblock].lcol + 1;
-
- rewind(fp);
-
- /* allocations. we can't use AllocateAlignment because of
- * the way we already used ainfo->sqinfo.
- */
- aseqs = (char **) MallocOrDie (num * sizeof(char *));
- if (have_cs)
- ainfo->cs = (char *) MallocOrDie ((alen+1) * sizeof(char));
- if (have_rf)
- ainfo->rf = (char *) MallocOrDie ((alen+1) * sizeof(char));
-
-
-
- for (i = 0; i < num; i++)
- {
- aseqs[i] = (char *) MallocOrDie ((alen+1) * sizeof(char));
- if (ainfo->sqinfo[i].flags & SQINFO_SS)
- ainfo->sqinfo[i].ss = (char *) MallocOrDie ((alen+1) * sizeof(char));
- if (ainfo->sqinfo[i].flags & SQINFO_SA)
- ainfo->sqinfo[i].sa = (char *) MallocOrDie ((alen+1) * sizeof(char));
- }
-
- ainfo->alen = alen;
- ainfo->nseq = num;
- ainfo->wgt = (float *) MallocOrDie (sizeof(float) * num);
- FSet(ainfo->wgt, num, 1.0);
-
- /***************************************************
- * Second pass across file. Parse header; assemble sequences
- ***************************************************/
- /* We've now made a complete first pass over the file. We know how
- * many blocks it contains, we know the number of seqs in the first
- * block, and we know every block has the same number of blocks;
- * so we can be a bit more cavalier about error-checking as we
- * make the second pass.
- */
-
- /* Look for header
- */
- headnum = 0;
- for (;;)
- {
- if (fgets(buffer, LINEBUFLEN, fp) == NULL)
- Die("Parse error in ReadSELEX()");
- strcpy(bufcpy, buffer);
- if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) continue; /* skip blank lines */
-
- if (strcmp(nptr, "#=AU") == 0 && (sptr = strtok(NULL, "\n")) != NULL)
- ainfo->au = Strdup(sptr);
- else if (strcmp(nptr, "#=ID") == 0 && (sptr = strtok(NULL, "\n")) != NULL)
- ainfo->name = Strdup(sptr);
- else if (strcmp(nptr, "#=AC") == 0 && (sptr = strtok(NULL, "\n")) != NULL)
- ainfo->acc = Strdup(sptr);
- else if (strcmp(nptr, "#=DE") == 0 && (sptr = strtok(NULL, "\n")) != NULL)
- ainfo->desc = Strdup(sptr);
- else if (strcmp(nptr, "#=GA") == 0)
- {
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=GA line in ReadSELEX()");
- ainfo->ga1 = atof(sptr);
-
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=GA line in ReadSELEX()");
- ainfo->ga2 = atof(sptr);
-
- ainfo->flags |= AINFO_GA;
- }
- else if (strcmp(nptr, "#=TC") == 0)
- {
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=TC line in ReadSELEX()");
- ainfo->tc1 = atof(sptr);
-
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=TC line in ReadSELEX()");
- ainfo->tc2 = atof(sptr);
-
- ainfo->flags |= AINFO_TC;
- }
- else if (strcmp(nptr, "#=NC") == 0)
- {
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=NC line in ReadSELEX()");
- ainfo->nc1 = atof(sptr);
-
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=NC line in ReadSELEX()");
- ainfo->nc2 = atof(sptr);
-
- ainfo->flags |= AINFO_NC;
- }
- else if (strcmp(nptr, "#=SQ") == 0) /* per-sequence header info */
- {
- /* first field is the name */
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=SQ line in ReadSELEX()");
- if (strcmp(sptr, ainfo->sqinfo[headnum].name) != 0) warn_names = TRUE;
-
- /* second field is the weight */
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=SQ line in ReadSELEX()");
- if (!IsReal(sptr))
- Die("Parse error in #=SQ line in ReadSELEX(): weight is not a number");
- ainfo->wgt[headnum] = atof(sptr);
-
- /* third field is database source id */
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=SQ line in ReadSELEX(): incomplete line");
- SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_ID);
-
- /* fourth field is database accession number */
- if ((sptr = strtok(NULL, WHITESPACE)) == NULL)
- Die("Parse error in #=SQ line in ReadSELEX(): incomplete line");
- SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_ACC);
-
- /* fifth field is start..stop::olen */
- if ((sptr = strtok(NULL, ".:")) == NULL)
- Die("Parse error in #=SQ line in ReadSELEX(): incomplete line");
- SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_START);
-
- if ((sptr = strtok(NULL, ".:")) == NULL)
- Die("Parse error in #=SQ line in ReadSELEX(): incomplete line");
- SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_STOP);
-
- if ((sptr = strtok(NULL, ":\t ")) == NULL)
- Die("Parse error in #=SQ line in ReadSELEX(): incomplete line");
- SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_OLEN);
-
- /* rest of line is optional description */
- if ((sptr = strtok(NULL, "\n")) != NULL)
- SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_DESC);
-
- headnum++;
- }
- else if (strcmp(nptr, "#=CS") == 0) break;
- else if (strcmp(nptr, "#=RF") == 0) break;
- else if (strchr(commentsyms, *nptr) == NULL) break; /* non-comment, non-header */
- }
-
-
- currlen = 0;
- for (currblock = 0 ; currblock < blocknum; currblock++)
- {
- /* parse the block */
- seqidx = 0;
- while (nptr != NULL)
- {
- /* Consensus structure */
- if (strcmp(nptr, "#=CS") == 0)
- {
- if (! copy_alignment_line(ainfo->cs, currlen, strlen(nptr)-1,
- buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.'))
- Die("Parse error in #=CS line in ReadSELEX()");
- }
-
- /* Reference coordinates */
- else if (strcmp(nptr, "#=RF") == 0)
- {
- if (! copy_alignment_line(ainfo->rf, currlen, strlen(nptr)-1,
- buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.'))
- Die("Parse error in #=RF line in ReadSELEX()");
- }
- /* Individual secondary structure */
- else if (strcmp(nptr, "#=SS") == 0)
- {
- if (! copy_alignment_line(ainfo->sqinfo[seqidx-1].ss, currlen, strlen(nptr)-1,
- buffer, blocks[currblock].lcol,
- blocks[currblock].rcol, (char) '.'))
- Die("Parse error in #=SS line in ReadSELEX()");
- }
-
- /* Side chain % surface accessibility code */
- else if (strcmp(nptr, "#=SA") == 0)
- {
- if (! copy_alignment_line(ainfo->sqinfo[seqidx-1].sa, currlen, strlen(nptr)-1,
- buffer, blocks[currblock].lcol,
- blocks[currblock].rcol, (char) '.'))
- Die("Parse error in #=SA line in ReadSELEX()");
- }
- /* Aligned sequence; avoid unparsed machine comments */
- else if (strncmp(nptr, "#=", 2) != 0)
- {
- if (! copy_alignment_line(aseqs[seqidx], currlen, strlen(nptr)-1,
- buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.'))
- Die("Parse error in alignment line in ReadSELEX()");
- seqidx++;
- }
-
- /* get next line */
- for (;;)
- {
- nptr = NULL;
- if (fgets(buffer, LINEBUFLEN, fp) == NULL) break; /* EOF */
- strcpy(bufcpy, buffer);
- if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) break; /* blank */
- if (strncmp(buffer, "#=", 2) == 0) break; /* machine comment */
- if (strchr(commentsyms, *nptr) == NULL) break; /* data */
- }
- } /* end of a block */
-
- currlen += blocks[currblock].rcol - blocks[currblock].lcol + 1;
-
- /* get line 1 of next block */
- for (;;)
- {
- if (fgets(buffer, LINEBUFLEN, fp) == NULL) break; /* no data */
- strcpy(bufcpy, buffer);
- if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) continue; /* blank */
- if (strncmp(buffer, "#=", 2) == 0) break; /* machine comment */
- if (strchr(commentsyms, *nptr) == NULL) break; /* non-comment */
- }
- } /* end of the file */
-
- /* Lengths in sqinfo are for raw sequence (ungapped),
- * and SS, SA are 0..rlen-1 not 0..alen-1.
- * Only the seqs with structures come out of here with lengths set.
- */
- for (seqidx = 0; seqidx < num; seqidx++)
- {
- int apos, rpos;
- /* secondary structures */
- if (ainfo->sqinfo[seqidx].flags & SQINFO_SS)
- {
- for (apos = rpos = 0; apos < alen; apos++)
- if (! isgap(aseqs[seqidx][apos]))
- {
- ainfo->sqinfo[seqidx].ss[rpos] = ainfo->sqinfo[seqidx].ss[apos];
- rpos++;
- }
- ainfo->sqinfo[seqidx].ss[rpos] = '\0';
- }
- /* Surface accessibility */
- if (ainfo->sqinfo[seqidx].flags & SQINFO_SA)
- {
- for (apos = rpos = 0; apos < alen; apos++)
- if (! isgap(aseqs[seqidx][apos]))
- {
- ainfo->sqinfo[seqidx].sa[rpos] = ainfo->sqinfo[seqidx].sa[apos];
- rpos++;
- }
- ainfo->sqinfo[seqidx].sa[rpos] = '\0';
- }
- }
-
- /* NULL-terminate all the strings */
- if (ainfo->rf != NULL) ainfo->rf[alen] = '\0';
- if (ainfo->cs != NULL) ainfo->cs[alen] = '\0';
- for (seqidx = 0; seqidx < num; seqidx++)
- aseqs[seqidx][alen] = '\0';
-
- /* find raw sequence lengths for sqinfo */
- for (seqidx = 0; seqidx < num; seqidx++)
- {
- count = 0;
- for (sptr = aseqs[seqidx]; *sptr != '\0'; sptr++)
- if (!isgap(*sptr)) count++;
- ainfo->sqinfo[seqidx].len = count;
- ainfo->sqinfo[seqidx].flags |= SQINFO_LEN;
- }
-
-
- /***************************************************
- * Garbage collection and return
- ***************************************************/
- free(blocks);
- if (warn_names)
- Warn("sequences may be in different orders in blocks of %s?", afp->fname);
-
- /* Convert back to MSA structure. (Wasteful kludge.)
- */
- msa = MSAFromAINFO(aseqs, ainfo);
- MSAVerifyParse(msa);
- FreeAlignment(aseqs, ainfo);
- return msa;
-}
-
-
-/* Function: WriteSELEX()
- * Date: SRE, Mon Jun 14 13:13:14 1999 [St. Louis]
- *
- * Purpose: Write a SELEX file in multiblock format.
- *
- * Args: fp - file that's open for writing
- * msa - multiple sequence alignment object
- *
- * Returns: (void)
- */
-void
-WriteSELEX(FILE *fp, MSA *msa)
-{
- actually_write_selex(fp, msa, 50); /* 50 char per block */
-}
-
-/* Function: WriteSELEXOneBlock()
- * Date: SRE, Mon Jun 14 13:14:56 1999 [St. Louis]
- *
- * Purpose: Write a SELEX alignment file in Pfam's single-block
- * format style. A wrapper for actually_write_selex().
- *
- * Args: fp - file that's open for writing
- * msa- alignment to write
- *
- * Returns: (void)
- */
-void
-WriteSELEXOneBlock(FILE *fp, MSA *msa)
-{
- actually_write_selex(fp, msa, msa->alen); /* one big block */
-}
-
-
-/* Function: actually_write_selex()
- * Date: SRE, Mon Jun 14 12:54:46 1999 [St. Louis]
- *
- * Purpose: Write an alignment in SELEX format to an open
- * file. This is the function that actually does
- * the work. The API's WriteSELEX() and
- * WriteSELEXOneBlock() are wrappers.
- *
- * Args: fp - file that's open for writing
- * msa - alignment to write
- * cpl - characters to write per line in alignment block
- *
- * Returns: (void)
- */
-static void
-actually_write_selex(FILE *fp, MSA *msa, int cpl)
-{
- int i;
- int len = 0;
- int namewidth;
- char *buf;
- int currpos;
-
- buf = malloc(sizeof(char) * (cpl+101)); /* 100 chars allowed for name, etc. */
-
- /* Figure out how much space we need for name + markup
- * to keep the alignment in register, for easier human viewing --
- * even though Stockholm format doesn't care about visual
- * alignment.
- */
- namewidth = 0;
- for (i = 0; i < msa->nseq; i++)
- if ((len = strlen(msa->sqname[i])) > namewidth)
- namewidth = len;
- if (namewidth < 6) namewidth = 6; /* minimum space for markup tags */
-
- /* Free text comments
- */
- for (i = 0; i < msa->ncomment; i++)
- fprintf(fp, "# %s\n", msa->comment[i]);
- if (msa->ncomment > 0) fprintf(fp, "\n");
-
- /* Per-file annotation
- */
- if (msa->name != NULL) fprintf(fp, "#=ID %s\n", msa->name);
- if (msa->acc != NULL) fprintf(fp, "#=AC %s\n", msa->acc);
- if (msa->desc != NULL) fprintf(fp, "#=DE %s\n", msa->desc);
- if (msa->au != NULL) fprintf(fp, "#=AU %s\n", msa->au);
-
- /* Thresholds are hacky. Pfam has two. Rfam has one.
- */
- if (msa->cutoff_is_set[MSA_CUTOFF_GA1] && msa->cutoff_is_set[MSA_CUTOFF_GA2])
- fprintf(fp, "#=GA %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_GA1], msa->cutoff[MSA_CUTOFF_GA2]);
- else if (msa->cutoff_is_set[MSA_CUTOFF_GA1])
- fprintf(fp, "#=GA %.1f\n", msa->cutoff[MSA_CUTOFF_GA1]);
- if (msa->cutoff_is_set[MSA_CUTOFF_NC1] && msa->cutoff_is_set[MSA_CUTOFF_NC2])
- fprintf(fp, "#=NC %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_NC1], msa->cutoff[MSA_CUTOFF_NC2]);
- else if (msa->cutoff_is_set[MSA_CUTOFF_NC1])
- fprintf(fp, "#=NC %.1f\n", msa->cutoff[MSA_CUTOFF_NC1]);
- if (msa->cutoff_is_set[MSA_CUTOFF_TC1] && msa->cutoff_is_set[MSA_CUTOFF_TC2])
- fprintf(fp, "#=TC %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_TC1], msa->cutoff[MSA_CUTOFF_TC2]);
- else if (msa->cutoff_is_set[MSA_CUTOFF_TC1])
- fprintf(fp, "#=TC %.1f\n", msa->cutoff[MSA_CUTOFF_TC1]);
-
- /* Per-sequence annotation
- */
- for (i = 0; i < msa->nseq; i++)
- fprintf(fp, "#=SQ %-*.*s %6.4f %s %s %d..%d::%d %s\n",
- namewidth, namewidth, msa->sqname[i],
- msa->wgt[i],
- "-", /* MSA has no ID field */
- (msa->sqacc != NULL && msa->sqacc[i] != NULL) ? msa->sqacc[i] : "-",
- 0, 0, 0, /* MSA has no start, stop, olen field */
- (msa->sqdesc != NULL && msa->sqdesc[i] != NULL) ? msa->sqdesc[i] : "-");
- fprintf(fp, "\n");
-
- /* Alignment section:
- */
- for (currpos = 0; currpos < msa->alen; currpos += cpl)
- {
- if (currpos > 0) fprintf(fp, "\n");
-
- if (msa->ss_cons != NULL) {
- strncpy(buf, msa->ss_cons + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=CS", buf);
- }
- if (msa->rf != NULL) {
- strncpy(buf, msa->rf + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=RF", buf);
- }
- for (i = 0; i < msa->nseq; i++)
- {
- strncpy(buf, msa->aseq[i] + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, msa->sqname[i], buf);
-
- if (msa->ss != NULL && msa->ss[i] != NULL) {
- strncpy(buf, msa->ss[i] + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=SS", buf);
- }
- if (msa->sa != NULL && msa->sa[i] != NULL) {
- strncpy(buf, msa->sa[i] + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "%-*.*s %s\n", namewidth, namewidth, "#=SA", buf);
- }
- }
- }
- free(buf);
-}
-
-
-/* Function: copy_alignment_line()
- *
- * Purpose: Given a line from an alignment file, and bounds lcol,rcol
- * on what part of it may be sequence, save the alignment into
- * aseq starting at position apos.
- *
- * name_rcol is set to the rightmost column this aseqs's name
- * occupies; if name_rcol >= lcol, we have a special case in
- * which the name intrudes into the sequence zone.
- */
-static int
-copy_alignment_line(char *aseq, int apos, int name_rcol,
- char *buffer, int lcol, int rcol, char gapsym)
-{
- char *s1, *s2;
- int i;
-
- s1 = aseq + apos;
- s2 = buffer; /* be careful that buffer doesn't end before lcol! */
- for (i = 0; i < lcol; i++)
- if (*s2) s2++;
-
- for (i = lcol; i <= rcol; i++)
- {
- if (*s2 == '\t') {
- Warn("TAB characters will corrupt a SELEX alignment! Please remove them first.");
- return 0;
- }
- if (name_rcol >= i) /* name intrusion special case: pad left w/ gaps */
- *s1 = gapsym;
- /* short buffer special case: pad right w/ gaps */
- else if (*s2 == '\0' || *s2 == '\n')
- *s1 = gapsym;
-
- else if (*s2 == ' ') /* new: disallow spaces as gap symbols */
- *s1 = gapsym;
-
- else /* normal case: copy buffer into aseq */
- *s1 = *s2;
-
- s1++;
- if (*s2) s2++;
- }
- return 1;
-}
-
-
-
-
-
-/* Function: DealignAseqs()
- *
- * Given an array of (num) aligned sequences aseqs,
- * strip the gaps. Store the raw sequences in a new allocated array.
- *
- * Caller is responsible for free'ing the memory allocated to
- * rseqs.
- *
- * Returns 1 on success. Returns 0 and sets squid_errno on
- * failure.
- */
-int
-DealignAseqs(char **aseqs, int num, char ***ret_rseqs)
-{
- char **rseqs; /* de-aligned sequence array */
- int idx; /* counter for sequences */
- int depos; /* position counter for dealigned seq*/
- int apos; /* position counter for aligned seq */
- int seqlen; /* length of aligned seq */
-
- /* alloc space */
- rseqs = (char **) MallocOrDie (num * sizeof(char *));
- /* main loop */
- for (idx = 0; idx < num; idx++)
- {
- seqlen = strlen(aseqs[idx]);
- /* alloc space */
- rseqs[idx] = (char *) MallocOrDie ((seqlen + 1) * sizeof(char));
-
- /* strip gaps */
- depos = 0;
- for (apos = 0; aseqs[idx][apos] != '\0'; apos++)
- if (!isgap(aseqs[idx][apos]))
- {
- rseqs[idx][depos] = aseqs[idx][apos];
- depos++;
- }
- rseqs[idx][depos] = '\0';
- }
- *ret_rseqs = rseqs;
- return 1;
-}
-
-
-/* Function: IsSELEXFormat()
- *
- * Return TRUE if filename may be in SELEX format.
- *
- * Accuracy is sacrificed for speed; a TRUE return does
- * *not* guarantee that the file will pass the stricter
- * error-checking of ReadSELEX(). All it checks is that
- * the first 500 non-comment lines of a file are
- * blank, or if there's a second "word" on the line
- * it looks like sequence (i.e., it's not kOtherSeq).
- *
- * Returns TRUE or FALSE.
- */
-int
-IsSELEXFormat(char *filename)
-{
- FILE *fp; /* ptr to open sequence file */
- char buffer[LINEBUFLEN];
- char *sptr; /* ptr to first word */
- int linenum;
-
-
- if ((fp = fopen(filename, "r")) == NULL)
- { squid_errno = SQERR_NOFILE; return 0; }
-
- linenum = 0;
- while (linenum < 500 &&
- fgets(buffer, LINEBUFLEN, fp) != NULL)
- {
- linenum++;
- /* dead giveaways for extended SELEX */
- if (strncmp(buffer, "#=AU", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=ID", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=AC", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=DE", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=GA", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=TC", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=NC", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=SQ", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=SS", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=CS", 4) == 0) goto DONE;
- else if (strncmp(buffer, "#=RF", 4) == 0) goto DONE;
-
- /* a comment? */
- if (strchr(commentsyms, *buffer) != NULL) continue;
-
- /* a blank line? */
- if ((sptr = strtok(buffer, WHITESPACE)) == NULL) continue;
-
- /* a one-word line (name only)
- is possible, though rare */
- if ((sptr = strtok(NULL, "\n")) == NULL) continue;
-
- if (Seqtype(sptr) == kOtherSeq) {fclose(fp); return 0;}
- }
-
- DONE:
- fclose(fp);
- return 1;
-}
-
-
-
-
-
-
-
-
diff --git a/squid/seqencode.c b/squid/seqencode.c
deleted file mode 100644
index 03f3322..0000000
--- a/squid/seqencode.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* seqencode.c
- *
- * Routines for creating and manipulating encoded sequence strings.
- * CVS $Id: seqencode.c,v 1.4 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include "squid.h"
-
- /* seqcmp()
- returns 0 if s1 == s2
- mismatch number otherwise */
-int
-seqcmp(char *s1, char *s2, int allow)
-{
- int mmat = 0;
-
- while ((*s1 != NTEND) && (*s2 != NTEND) && (mmat <= allow))
- {
- if (!(ntmatch(*s1, *s2)))
- mmat++;;
- s1++;
- s2++;
- }
- while ((*s1++ != NTEND) && (mmat <= allow))
- mmat++;
- return(mmat);
-}
- /* seqncmp()
- same as seqcmp but it looks at,
- at most, n positions */
-int
-seqncmp(char *s1, char *s2, int n, int allow)
-{
- int mmat = 0;
-
- while ((*s2 != NTEND) &&
- (n-- != 0))
- {
- if ((!(ntmatch(*s1, *s2))) &&
- (++mmat > allow))
- return(mmat);
- s1++;
- s2++;
- }
- while ((n-- != 0) && (*s1++ != NTEND) && (mmat <= allow))
- mmat++;
- return (mmat);
-}
-
- /* seqencode()
- given a character text string str (A,C,G,T),
- convert to an encoded seq string;
- return 1 for success, 0 if fail */
-int
-seqencode(char *codeseq, /* pre-allocated space for answer */
- char *str) /* character string to convert */
-{
- char *ptr;
- int idx;
-
- ptr = codeseq;
- while (*str != '\0')
- {
- if (islower((int) (*str))) *str = (char) toupper((int) (*str));
- for (idx = 0; *str != iupac[idx].sym && idx <= IUPACSYMNUM; idx++)
- ;
- if (idx > IUPACSYMNUM)
- {
- *ptr = (char) NTEND;
- return 0;
- }
- else
- *ptr = iupac[idx].code;
- ptr++;
- str++;
- }
- *ptr = NTEND;
- return 1;
-}
-
-
-int
-coded_revcomp(char *comp, char *seq)
-{
- long bases;
- char *bckp, *fwdp;
- int idx;
- long pos;
-
- bases = strlen(seq);
-
- fwdp = comp;
- bckp = seq + bases -1;
- for (pos = 0; pos < bases; pos++)
- {
- for (idx = 0; *bckp != iupac[idx].code && idx < IUPACSYMNUM; idx++);
- if (idx > IUPACSYMNUM)
- {
- *fwdp = NTEND;
- return 0;
- }
- else
- *fwdp = iupac[idx].comp;
- fwdp++;
- bckp--;
- }
- *fwdp = NTEND;
- return(1);
-}
-
-int
-seqdecode(char *str, char *codeseq)
-{
- int idx;
- int pos;
-
- pos = 0;
- while (*codeseq != NTEND)
- {
- for (idx = 0; *codeseq != iupac[idx].code && idx < IUPACSYMNUM; idx++)
- ;
- if (idx > IUPACSYMNUM)
- {
- str[pos] = 'X';
- return 0;
- }
- else
- str[pos] = iupac[idx].sym;
- codeseq++;
- pos++;
- }
- str[pos] = '\0';
- return 1;
-}
-
-int
-seqndecode(
- char *str, /* pre-allocated string to write into */
- char *codeseq, /* sequence to decode */
- int n) /* how many bases to decode */
-{
- int idx;
- int pos = 0;
-
- while (--n >= 0)
- {
- for (idx = 0; *codeseq != iupac[idx].code && idx < IUPACSYMNUM; idx++);
- if (idx > IUPACSYMNUM)
- {
- str[pos] = 'X';
- return 0;
- }
- else
- str[pos] = iupac[idx].sym;
- codeseq++;
- pos++;
- }
- str[pos] = '\0';
- return 1;
-}
-
diff --git a/squid/seqsplit_main.c b/squid/seqsplit_main.c
deleted file mode 100644
index 46098fe..0000000
--- a/squid/seqsplit_main.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-
-/* seqsplit_main.c
- * SRE, Mon Sep 25 11:43:58 2000
- *
- * Split sequences into smaller chunks of defined size and overlap;
- * output a FASTA file.
- *
- * Limitations:
- * still working in 32 bits -- no sequence can be more than 2 GB
- * in size.
- * CVS $Id: seqsplit_main.c,v 1.7 2003/05/26 16:21:50 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include "squid.h"
-#include "msa.h"
-
-static char banner[] = "seqsplit - split seqs into chunks of defined size and overlap";
-
-static char usage[] = "\
-Usage: seqsplit [-options] <seqfile>\n\
- Available options:\n\
- -h : help; display usage and version\n\
- -o <file> : output the new FASTA file to <file>\n\
-";
-
-static char experts[] = "\
- --fragfile <f> : save one-line-per-frag coord summary file to <f>\n\
- --informat <s> : specify sequence file format <s>\n\
- --length <n> : set max length of each unique seq frag to <n>\n\
- --overlap <n> : set overlap length to <n> (total frag size = length+overlap)\n\
- --shortnames : use short \"frag1\" names, not \"<src>/<from>-<to>\"\n\
-";
-
-static struct opt_s OPTIONS[] = {
- { "-h", TRUE, sqdARG_NONE },
- { "-o", TRUE, sqdARG_STRING },
- { "--fragfile", FALSE, sqdARG_STRING },
- { "--informat", FALSE, sqdARG_STRING },
- { "--length", FALSE, sqdARG_INT },
- { "--overlap", FALSE, sqdARG_INT },
- { "--shortnames", FALSE, sqdARG_NONE },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-static char *set_description(char *source, int start, int end, char *origdesc);
-static char *set_name(char *origname, int start, int end, int do_shortnames, int fragnum);
-
-int
-main(int argc, char **argv)
-{
- char *seqfile; /* name of sequence file */
- char *outfile; /* name of output file */
- SQFILE *dbfp; /* open sequence file */
- FILE *ofp; /* open output file */
- int fmt; /* format of seqfile */
- char *seq; /* sequence */
- SQINFO sqinfo; /* extra info about sequence */
- char *seqfrag; /* space for a seq fragment */
- int fraglength; /* length of unique seq per frag */
- int overlap; /* length of overlap. frags are fraglength+overlap*/
- char *sqname; /* renamed fragment, w/ coord info */
- char *desc; /* new desc line */
- int num; /* number of this fragment */
- int pos; /* position in a sequence */
- int len; /* length of a fragment */
-
-
- int nseqs; /* total number of input sequences */
- int nsplit; /* number of seqs that get split */
- int nnewfrags; /* total number of new fragments */
- int ntot; /* total number of seqs in new file */
- int do_shortnames; /* TRUE to do short code names */
- char *fragfile; /* fragment summary out file, or NULL */
- FILE *fragfp;
-
- char *optname;
- char *optarg;
- int optind;
-
- /***********************************************
- * Parse command line
- ***********************************************/
-
- fmt = SQFILE_UNKNOWN; /* default: autodetect */
- fraglength = 100000;
- overlap = 1000;
- outfile = NULL;
- do_shortnames = FALSE;
- fragfile = NULL;
- fragfp = NULL;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "-o") == 0) outfile = optarg;
- else if (strcmp(optname, "--fragfile") == 0) fragfile = optarg;
- else if (strcmp(optname, "--length") == 0) fraglength = atoi(optarg);
- else if (strcmp(optname, "--overlap") == 0) overlap = atoi(optarg);
- else if (strcmp(optname, "--shortnames") == 0) do_shortnames = TRUE;
- else if (strcmp(optname, "--informat") == 0) {
- fmt = String2SeqfileFormat(optarg);
- if (fmt == SQFILE_UNKNOWN)
- Die("unrecognized sequence file format \"%s\"", optarg);
- }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc - optind != 1) Die("%s\n", usage);
- seqfile = argv[argc-1];
-
- seqfrag = MallocOrDie(sizeof(char) * (fraglength+overlap));
- seqfrag[fraglength+overlap] = '\0';
-
- /* Try to work around inability to autodetect from a pipe or .gz:
- * assume FASTA format
- */
- if (fmt == SQFILE_UNKNOWN &&
- (Strparse("^.*\\.gz$", seqfile, 0) || strcmp(seqfile, "-") == 0))
- fmt = SQFILE_FASTA;
-
-
- /***********************************************
- * Read the file.
- ***********************************************/
-
-
- if (outfile == NULL) ofp = stdout;
- else {
- if ((ofp = fopen(outfile, "w")) == NULL)
- Die("Failed to open output sequence file %s for writing", outfile);
- }
-
- if (fragfile != NULL) {
- if ((fragfp = fopen(fragfile, "w")) == NULL)
- Die("Failed to open frag summary file %s for writing", fragfile);
- }
-
- if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)
- Die("Failed to open sequence file %s for reading", seqfile);
-
- nseqs = nsplit = nnewfrags = ntot = 0;
- while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo))
- {
- nseqs++;
-
- if (sqinfo.len <= fraglength+overlap) {
- ntot++;
- if (do_shortnames) {
- sqname = set_name(sqinfo.name, 1, sqinfo.len, do_shortnames, ntot);
- desc = set_description(sqinfo.name, 1, sqinfo.len,
- sqinfo.flags & SQINFO_DESC ? sqinfo.desc : NULL);
- } else {
- sqname = sre_strdup(sqinfo.name, -1);
- if (sqinfo.flags & SQINFO_DESC) desc = sre_strdup(sqinfo.desc, -1);
- else desc = NULL;
- }
-
- WriteSimpleFASTA(ofp, seq, sqname, desc);
-
- if (fragfp != NULL)
- fprintf(fragfp, "%s\t%s\t%d\t%d\n", sqname, sqinfo.name, 1, sqinfo.len);
- if (desc != NULL) free(desc);
- free(sqname);
- continue;
- }
-
- num = 1;
- nsplit++;
- for (pos = 0; pos < sqinfo.len; pos += fraglength)
- {
- if (sqinfo.len - pos <= overlap) continue;
-
- ntot++;
- strncpy(seqfrag, seq+pos, fraglength+overlap);
- len = strlen(seqfrag);
-
- if (do_shortnames) {
- sqname = set_name(sqinfo.name, pos+1, pos+len, do_shortnames, ntot);
- desc = set_description(sqinfo.name, pos+1, pos+len,
- sqinfo.flags & SQINFO_DESC ? sqinfo.desc : NULL);
- } else {
- sqname = set_name(sqinfo.name, pos+1, pos+len, do_shortnames, num);
- if (sqinfo.flags & SQINFO_DESC) desc = sre_strdup(sqinfo.desc, -1);
- else desc = NULL;
- }
-
- WriteSimpleFASTA(ofp, seqfrag, sqname, desc);
-
- if (fragfp != NULL)
- fprintf(fragfp, "%s\t%s\t%d\t%d\n", sqname, sqinfo.name, pos+1,
- pos+len);
-
- if (desc != NULL) free(desc);
- free(sqname);
- nnewfrags++;
- num ++;
- }
- FreeSequence(seq, &sqinfo);
- }
- SeqfileClose(dbfp);
- if (outfile != NULL) fclose(ofp);
- if (fragfile != NULL) fclose(fragfp);
-
- printf("Total # of seqs: %d\n", nseqs);
- printf("Affected by splitting: %d\n", nsplit);
- printf("New # of seqs: %d\n", nseqs-nsplit + nnewfrags);
-
- return 0;
-}
-
-
-static char *
-set_description(char *source, int start, int end, char *origdesc)
-{
- int len;
- char *new;
-
- len = 7; /* for [:..] \0 */
- if (source != NULL) {
- len += strlen(source);
- len += start > 0 ? ceil(log10(start+1)) : 1; /* itoa length */
- len += end > 0 ? ceil(log10(end+1)) : 1;
- }
- if (origdesc != NULL) len += strlen(origdesc);
-
- if (source != NULL) {
- new = MallocOrDie(sizeof(char) * len);
- sprintf(new, "[%s:%d..%d] %s", source, start, end,
- origdesc == NULL ? "" : origdesc);
- } else if (origdesc != NULL) {
- new = sre_strdup(origdesc, -1);
- } else
- new = NULL;
-
- return new;
-}
-
-static char *
-set_name(char *origname, int start, int end, int do_shortnames, int fragnum)
-{
- int len;
- char *new;
-
- if (do_shortnames) {
- len = 5; /* frag \0 */
- len += fragnum > 0 ? ceil(log10(fragnum+1)) : 1;
- new = MallocOrDie(sizeof(char) * len);
- sprintf(new, "frag%d", fragnum);
- } else {
- len = strlen(origname) + 8;
- len += fragnum > 0 ? ceil(log10(fragnum+1)) : 1;
- len += start > 0 ? ceil(log10(start+1)) : 1; /* itoa length */
- len += end > 0 ? ceil(log10(end+1)) : 1;
- new = MallocOrDie(sizeof(char) * len);
- sprintf(new, "%s/frag%d/%d-%d", origname, fragnum, start, end);
- }
- return new;
-}
diff --git a/squid/seqstat_main.c b/squid/seqstat_main.c
deleted file mode 100644
index bef93db..0000000
--- a/squid/seqstat_main.c
+++ /dev/null
@@ -1,238 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* seqstat_main.c
- * Wed Aug 10 15:47:14 1994
- *
- * Look at a sequence file, determine some simple statistics.
- * CVS $Id: seqstat_main.c,v 1.12 2003/05/26 16:21:50 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <limits.h>
-#include <ctype.h>
-#include "squid.h"
-#include "msa.h"
-
-static char banner[] = "seqstat - show some simple statistics on a sequence file";
-
-static char usage[] = "\
-Usage: seqstat [-options] <seqfile>\n\
- Available options:\n\
- -a : report per-sequence info, not just a summary\n\
- -h : help; display usage and version\n\
-";
-
-static char experts[] = "\
- --gccomp : with -a, include GC composition in report (DNA/RNA only)\n\
- --informat <s> : specify sequence file format <s>\n\
- --quiet : suppress verbose header (used in regression testing)\n\
-";
-
-static struct opt_s OPTIONS[] = {
- { "-a", TRUE, sqdARG_NONE },
- { "-h", TRUE, sqdARG_NONE },
- { "--gccomp", FALSE, sqdARG_NONE },
- { "--informat", FALSE, sqdARG_STRING },
- { "--quiet", FALSE, sqdARG_NONE },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-static float gc_composition(char *seq);
-
-int
-main(int argc, char **argv)
-{
- char *seqfile; /* name of sequence file */
- SQFILE *dbfp; /* open sequence file */
- int fmt; /* format of seqfile */
- char *seq; /* sequence */
- SQINFO sqinfo; /* extra info about sequence */
- int nseqs;
- long long small; /* smallest length */
- long long large; /* largest length */
- long long total; /* total length */
- int type; /* kAmino, kDNA, kRNA, or kOtherSeq */
-
- int allreport; /* TRUE to do a short table for each sequence */
- int be_quiet; /* TRUE to suppress header */
- int do_gccomp; /* TRUE to include GC composition in per-seq report */
- float gc; /* fractional gc composition, 0..1 */
-
- char *optname;
- char *optarg;
- int optind;
-
- /***********************************************
- * Parse command line
- ***********************************************/
-
- fmt = SQFILE_UNKNOWN; /* default: autodetect format */
- allreport = FALSE; /* default: file summary only */
- be_quiet = FALSE; /* show header info by default */
- type = kOtherSeq; /* just to silence gcc uninit warning */
- do_gccomp = FALSE;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "-a") == 0) allreport = TRUE;
- else if (strcmp(optname, "--quiet") == 0) be_quiet = TRUE;
- else if (strcmp(optname, "--gccomp") == 0) do_gccomp = TRUE;
-
- else if (strcmp(optname, "--informat") == 0) {
- fmt = String2SeqfileFormat(optarg);
- if (fmt == SQFILE_UNKNOWN)
- Die("unrecognized sequence file format \"%s\"", optarg);
- }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc - optind != 1) Die("%s\n", usage);
- seqfile = argv[argc-1];
-
- if (! be_quiet) SqdBanner(stdout, banner);
-
- /* Try to work around inability to autodetect from a pipe or .gz:
- * assume FASTA format
- */
- if (fmt == SQFILE_UNKNOWN &&
- (Strparse("^.*\\.gz$", seqfile, 0) || strcmp(seqfile, "-") == 0))
- fmt = SQFILE_FASTA;
-
- /***********************************************
- * Read the file.
- ***********************************************/
-
- if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)
- Die("Failed to open sequence file %s for reading", seqfile);
-
- if (allreport) {
- printf(" %-15s %-5s %s%s\n", " NAME", "LEN",
- do_gccomp? " f_GC " : "",
- "DESCRIPTION");
- printf(" --------------- ----- %s-----------\n",
- do_gccomp ? "----- " : "");
- }
-
- nseqs = 0;
- small = -1;
- large = -1;
- total = 0L;
- while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo))
- {
- if (nseqs == 0) type = Seqtype(seq);
- if (do_gccomp) gc = gc_composition(seq);
-
- if (allreport) {
- if (do_gccomp) {
- printf("* %-15s %5d %.3f %-50.50s\n", sqinfo.name, sqinfo.len,
- gc,
- sqinfo.flags & SQINFO_DESC ? sqinfo.desc : "");
- } else {
- printf("* %-15s %5d %-50.50s\n", sqinfo.name, sqinfo.len,
- sqinfo.flags & SQINFO_DESC ? sqinfo.desc : "");
- }
- }
-
- if (small == -1 || sqinfo.len < small) small = (long long) sqinfo.len;
- if (large == -1 || sqinfo.len > large) large = (long long) sqinfo.len;
- total += (long long) sqinfo.len;
- nseqs++;
- FreeSequence(seq, &sqinfo);
- }
- if (allreport) puts("");
-
- printf("Format: %s\n", SeqfileFormat2String(dbfp->format));
- printf("Type (of 1st seq): ");
- switch (type)
- {
- case kDNA: puts("DNA"); break;
- case kRNA: puts("RNA"); break;
- case kAmino: puts("Protein"); break;
- case kOtherSeq: puts("Unknown"); break;
- default: Die("oops.");
- }
- printf("Number of sequences: %d\n", nseqs);
- printf("Total # residues: %lld\n", total);
- printf("Smallest: %lld\n", small);
- printf("Largest: %lld\n", large);
- printf("Average length: %.1f\n", (float) total / (float) nseqs);
-
- SeqfileClose(dbfp);
-
- return 0;
-}
-
-
-/* Function: gc_composition()
- * Date: SRE, Mon Apr 23 10:01:48 2001 [St. Louis]
- *
- * Purpose: Calculate the fractional GC composition of
- * an input RNA or DNA sequence. Deals appropriately
- * with IUPAC degeneracy. Case-insensitive.
- * Ignores gap symbols. Other unexpected characters
- * make it die with an error (protein, for instance).
- *
- * Args: seq - the DNA or RNA sequence
- *
- * Returns: fractional GC composition, 0-1
- */
-static float
-gc_composition(char *seq)
-{
- int c;
- float total;
- float gc;
-
- gc = total = 0.;
- for (; *seq != '\0'; seq++)
- {
- if (isgap(c)) continue;
-
- c = toupper((int) *seq);
- total += 1.0;
-
- switch (c) {
- case 'C':
- case 'G':
- case 'S': gc += 1.0; break;
-
- case 'A':
- case 'T':
- case 'U':
- case 'W': gc += 0.0; break;
-
- case 'N':
- case 'R':
- case 'Y':
- case 'M':
- case 'K': gc += 0.5; break;
-
- case 'H':
- case 'D': gc += 0.3333; break;
-
- case 'B':
- case 'V': gc += 0.6667; break;
-
- default:
- Die("unrecognized nucleic acid character %c in sequence", c);
- }
- }
- return (gc/total);
-}
diff --git a/squid/sfetch_main.c b/squid/sfetch_main.c
deleted file mode 100644
index 5e32960..0000000
--- a/squid/sfetch_main.c
+++ /dev/null
@@ -1,464 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* sfetch_main.c, Fri Dec 25 14:22:17 1992, SRE
- *
- * sfetch -- a program to extract subsequences from a sequence database
- * Renamed from "getseq" SRE, Tue Jan 19 10:47:42 1999 (GCG clash)
- *
- * CVS $Id: sfetch_main.c,v 1.17 2003/05/26 16:21:50 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include "squid.h"
-#include "msa.h"
-#include "ssi.h"
-
-static char banner[] = "sfetch - retrieve a specified sequence from a file";
-
-static char usage[] = "\
-Usage: sfetch [-options] <seqname>\n\
- or: sfetch [-options] .\n\
- (The second version fetches the first seq in the file.)\n\
- Get a sequence from a database.\n\
- Available options:\n\
- -a : name is an accession number, not a key\n\
- -d <seqfile> : get sequence from <seqfile>\n\
- -D <database> : instead, get sequence from main database\n\
- -h : help; print version and usage info\n\
- -r <newname> : rename the fragment <newname>\n\
- -f <from> : from which residue (1..N)\n\
- -t <to> : to which residue (1..N)\n\
- -o <outfile> : direct output to <outfile>\n\
- -F <format> : use output format of <format>; see below for\n\
- list. Default is original format of database.\n\
-\n\
- Available output formats include:\n\
- fasta\n\
- genbank\n\
- embl\n\
- gcg\n\
- pir\n\
- raw\n\n\
- Available databases are: (if $env variables are set correctly)\n\
- -Dsw $SWDIR SwissProt\n\
- -Dpir $PIRDIR PIR\n\
- -Dem $EMBLDIR EMBL\n\
- -Dgb $GBDIR GenBank\n\
- -Dwp $WORMDIR WormPep\n\
- -Dowl $OWLDIR OWL\n";
-
-static char experts[] = "\
- --informat <s> : specify input sequence file format <s>\n\
-";
-
-static struct opt_s OPTIONS[] = {
- { "-a", TRUE, sqdARG_NONE },
- { "-d", TRUE, sqdARG_STRING },
- { "-f", TRUE, sqdARG_INT },
- { "-h", TRUE, sqdARG_NONE },
- { "-o", TRUE, sqdARG_STRING },
- { "-r", TRUE, sqdARG_STRING },
- { "-t", TRUE, sqdARG_INT },
- { "-D", TRUE, sqdARG_STRING },
- { "-F", TRUE, sqdARG_STRING },
- { "--informat", FALSE, sqdARG_STRING },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-/* dbenv maps command line database selection to an environment
- * variable, from which the database directory is obtained.
- */
-static struct dbenv_s {
- char *dbname; /* name of database, as used on command line */
- char *ssiname; /* name of GSI index file to look for */
- char *envname; /* environment var to get directory path from*/
- char *entryend; /* string signifying end of entry */
- int addend; /* TRUE if entryend line is part of entry */
-} dbenv[] =
-{
- { "sw", "swiss.ssi", "SWDIR", "//", TRUE},
- { "pir", "pir.ssi", "PIRDIR", "///", TRUE},
- { "em", "embl.ssi", "EMBLDIR", "//", TRUE},
- { "gb", "genbank.ssi","GBDIR", "//", TRUE},
- { "wp", "wormpep.ssi","WORMDIR", ">", FALSE},
- { "owl", "owl.ssi", "OWLDIR", ">", FALSE}, /* use FASTA OWL version */
-};
-#define NUMDBS (sizeof(dbenv) / sizeof(struct dbenv_s))
-
-int
-main(int argc, char **argv)
-{
- char *dbname; /* master database to search */
- char *seqfile; /* name of sequence file to read */
- char *ssifile; /* name of SSI index file (if one exists) */
- SQFILE *seqfp; /* pointer to open sequence file */
- char *getname; /* name of sequence to get from */
- int from; /* starting residue, 1..N */
- int to; /* ending residue, 1..N */
- char *outfile; /* name of file to put output to */
- FILE *outfp; /* file pointer to put output to */
- int format; /* format of seqfile */
- int outfmt; /* output format */
- char *seq; /* current working sequence */
- SQINFO sqinfo;
- char *frag; /* extracted subsequence */
- int source_start; /* start of seq on original source 1..N */
- int source_stop; /* end of seq on original source 1..N */
- int source_orient; /* sign of parent: -1 revcomp, +1 normal*/
- char *ss; /* secondary structure representation */
-
- SSIFILE *ssi; /* open SSI index file */
- SSIOFFSET ssi_offset; /* disk offset for locating sequence */
- int used_ssi; /* TRUE if SSI file was used (don't scan) */
- int status; /* status returned by an SSI call */
-
- char *rename; /* new name to give fragment */
- int reverse_complement; /* do we have to reverse complement? */
- int getall;
- int getfirst; /* TRUE to extract from the first seq, w/o looking at name */
- char *outformat; /* output format string */
- int by_accession; /* TRUE if name is accession number not key */
-
- int dbidx;
-
- char *optname;
- char *optarg;
- int optind;
-
- /***********************************************
- * Parse the command line
- ***********************************************/
-
- /* initializations and defaults */
- format = SQFILE_UNKNOWN; /* autodetect default, overridden by --informat or SSI files */
- reverse_complement = 0;
- getall = TRUE;
- getfirst= FALSE;
- dbname = NULL;
- dbidx = -1;
- seqfile = NULL;
- from = -1;
- to = -1; /* flag that says do the whole thing */
- outfile = NULL;
- getname = NULL;
- rename = NULL;
- outformat = NULL;
- by_accession = FALSE;
- used_ssi = FALSE;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "-a") == 0) { by_accession = TRUE; }
- else if (strcmp(optname, "-d") == 0) { seqfile = optarg; }
- else if (strcmp(optname, "-f") == 0) {
- from = atoi(optarg); getall = FALSE;
- }
- else if (strcmp(optname, "-t") == 0) {
- to = atoi(optarg); getall = FALSE;
- }
- else if (strcmp(optname, "-r") == 0) { rename = optarg; }
- else if (strcmp(optname, "-o") == 0) { outfile = optarg; }
- else if (strcmp(optname, "-D") == 0) { dbname = optarg; }
- else if (strcmp(optname, "-F") == 0) { outformat = optarg; }
- else if (strcmp(optname, "--informat") == 0) {
- format = String2SeqfileFormat(optarg);
- if (format == SQFILE_UNKNOWN)
- Die("unrecognized input sequence file format \"%s\"", optarg);
- }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc - optind != 1)
- Die("Incorrect number of command line arguments.\n%s\n", usage);
-
- getname = argv[optind];
- if (strcmp(getname, ".") == 0) getfirst = TRUE;
-
- if (getfirst && seqfile == NULL)
- Die("You need to specify -d <seqfile> to retrieve a first sequence.\n%s",
- usage);
-
- /* Try to work around inability to autodetect from a pipe or .gz:
- * assume FASTA format
- */
- if (seqfile != NULL &&
- format == SQFILE_UNKNOWN &&
- (Strparse("^.*\\.gz$", seqfile, 0) || strcmp(seqfile, "-") == 0))
- format = SQFILE_FASTA;
-
-
- /***********************************************
- * Get name of file to look through, and disk offset,
- * using SSI file if one exists. Three possibilities:
- * 1) Look in main DB, which has SSI index in the directory
- * 2) Look in a file, which has associated SSI index
- * 3) Look in an unindexed file
- ***********************************************/
-
- if (dbname != NULL && seqfile != NULL)
- Die("Can't fetch from *both* a database %s and a file %s\n%s",
- dbname, seqfile, usage);
- if (dbname == NULL && seqfile == NULL)
- { /* try to guess SwissProt, stupidly, but usually works */
- if (strchr(getname, '_') != NULL) dbname = Strdup("sw");
- else Die("You have to specify either a database or a seqfile\n%s", usage);
- }
-
- if (dbname != NULL) /* Main database. SSI index mandatory. */
- {
- char *dbdir;
- char *dbfile;
- int fh;
- /* find which db this is */
- for (dbidx = 0; dbidx < NUMDBS; dbidx++)
- if (strcmp(dbenv[dbidx].dbname, dbname) == 0)
- break;
- if (dbidx == NUMDBS)
- Die("No such main database %s\n%s", dbname, usage);
-
- /* get directory name */
- if ((dbdir = getenv(dbenv[dbidx].envname)) == NULL)
- Die("Environment variable %s is not set.\n%s",
- dbenv[dbidx].envname, usage);
- /* open ssi file */
- ssifile = (char *) MallocOrDie
- ((strlen(dbdir) + strlen(dbenv[dbidx].ssiname) + 2) * sizeof(char));
- sprintf(ssifile, "%s/%s", dbdir, dbenv[dbidx].ssiname);
- if ((status = SSIOpen(ssifile, &ssi)) != 0)
- Die("Failed to open SSI index file %s in directory %s\n%s",
- dbenv[dbidx].ssiname, dbdir, usage);
- /* get seqfile name, file format, and offset */
- if ((status = SSIGetOffsetByName(ssi, getname, &fh, &ssi_offset)) != 0)
- Die("Failed to find key %s in SSI file %s", getname, ssifile);
- if ((status = SSIFileInfo(ssi, fh, &dbfile, &format)) != 0)
- Die("SSI error: %s", SSIErrorString(status));
- free(ssifile);
- /* set up proper seqfile, with path */
- seqfile = (char *) MallocOrDie
- ((strlen(dbdir) + strlen(dbfile) + 2) * sizeof(char));
- sprintf(seqfile, "%s/%s", dbdir, dbfile);
- used_ssi = TRUE;
- SSIClose(ssi);
- }
- else if (! getfirst) /* Sequence file. SSI index optional. */
- {
- char *dbfile;
- int fh;
-
- ssifile = (char *) MallocOrDie ((strlen(seqfile) + 5) * sizeof(char));
- sprintf(ssifile, "%s.ssi", seqfile);
- if ((status = SSIOpen(ssifile, &ssi)) == 0)
- {
- SQD_DPRINTF1(("Opened SSI index %s...\n", ssifile));
- if ((status = SSIGetOffsetByName(ssi, getname, &fh, &ssi_offset)) != 0)
- Die("Failed to find key %s in SSI file %s", getname, ssifile);
- if ((status = SSIFileInfo(ssi, fh, &dbfile, &format)) != 0)
- Die("SSI error: %s", SSIErrorString(status));
-
- /* Set up seqfile name - possibly replacing
- what the user gave us in -d, because she may
- have been referring to an SSI file that
- indexes multiple sequence files.
- ... but be careful we preserve the path! */
- if ((seqfile = FileSameDirectory(ssifile, dbfile)) == NULL)
- Die("SSI file %s and dbfile %s are in different locations?!",
- ssifile, dbfile);
- SSIClose(ssi);
- used_ssi = TRUE;
- }
- free(ssifile);
- }
-
- /***********************************************
- * Open database file
- ***********************************************/
-
- if ((seqfp = SeqfileOpen(seqfile, format, NULL)) == NULL)
- Die("Failed to open sequence database file %s\n%s\n", seqfile, usage);
- if (used_ssi)
- SeqfilePosition(seqfp, &ssi_offset);
-
- /***********************************************
- * Open output file
- ***********************************************/
-
- /* Determine output format. Default: use same as input. Override: -F option.
- */
- outfmt = seqfp->format;
- if (outformat != NULL)
- {
- outfmt = String2SeqfileFormat(outformat);
- if (outfmt == SQFILE_UNKNOWN)
- Die("Unknown output format %s\n%s", outformat, usage);
- if (IsAlignmentFormat(outfmt))
- Die("Can't output a single sequence in an alignment format (%s)\n", outformat);
- }
- /* open output file for writing;
- use stdout by default */
- if (outfile == NULL) outfp = stdout;
- else if ((outfp = fopen(outfile, "w")) == NULL)
- Die("cannot open %s for output\n", outfile);
-
-
- /***********************************************
- * Main loop
- ***********************************************/
-
- /* If this is a simple fetch of the complete sequence
- * in native format, and we've been positioned in the file
- * by an SSI index file, we can just read right from the file,
- * partially bypassing the ReadSeq() API, and probably
- * putting our fingers a little too deep into the seqfp object.
- */
- if (getall && used_ssi && outfmt == format && dbname != NULL)
- {
- char *buf = NULL;
- int buflen = 0;
- int endlen;
-
- if (dbidx == -1) Die("That's weird. No database index available.");
- endlen = strlen(dbenv[dbidx].entryend);
- fputs(seqfp->buf, outfp); /* always do first line */
- /* fputs("\n", outfp); */ /* buf has its /n */
- while (sre_fgets(&buf, &buflen, seqfp->f) != NULL)
- {
- if (strncmp(buf, dbenv[dbidx].entryend, endlen) == 0)
- {
- if (dbenv[dbidx].addend) fputs(buf, outfp);
- break;
- }
- fputs(buf, outfp);
- }
- if (buf != NULL) free(buf);
- }
- else /* else, the hard way with ReadSeq */
- {
- seq = NULL;
- frag = NULL;
-
- while (ReadSeq(seqfp, format, &seq, &sqinfo))
- {
- if (used_ssi) /* SSI file puts us right on our seq. */
- break;
- else if (getfirst) /* Use the first seq in the file. */
- break;
- else if (by_accession &&
- (sqinfo.flags & SQINFO_ACC) &&
- strcmp(sqinfo.acc, getname) == 0)
- break;
- else if (strcmp(sqinfo.name, getname) == 0)
- break;
-
- FreeSequence(seq, &sqinfo);
- seq = NULL;
- }
-
- if (seq == NULL)
- Die("failed to extract the subsequence %s\n%s", getname, usage);
-
- if (getall)
- {
- from = 1;
- to = sqinfo.len;
- }
- else if (from == -1) from = 1;
- else if (to == -1) to = sqinfo.len;
-
- if (to > sqinfo.len || from > sqinfo.len)
- Warn("Extracting beyond the length of the sequence");
- if (to < 1 || from < 1)
- Warn("Extracting beyond the beginning of the sequence");
-
- /* check for reverse complement */
- if (to != -1 && from > to)
- {
- int swapfoo; /* temp variable for swapping coords */
-
- reverse_complement = TRUE;
- swapfoo = from; from = to; to = swapfoo;
- }
- if (to > sqinfo.len) to = sqinfo.len;
- if (from < 1) from = 1;
-
- if ((frag = (char *) calloc (to-from+2, sizeof(char))) == NULL)
- Die("memory error\n");
-
- if (strncpy(frag, seq+from-1, to-from+1) == NULL)
- Die("strncpy() failed\n");
-
- if (sqinfo.flags & SQINFO_SS)
- {
- if ((ss = (char *) calloc (to-from+2, sizeof(char))) == NULL)
- Die("memory error\n");
- if (strncpy(ss, sqinfo.ss+from-1, to-from+1) == NULL)
- Die("strncpy() failed\n");
- free(sqinfo.ss);
- sqinfo.ss = ss;
- }
-
- if (reverse_complement)
- {
- char *revfrag; /* temp variable for reverse complement */
- int swapfoo; /* temp variable for swapping coords back */
-
- if ((revfrag = calloc ( to-from+2, sizeof(char))) == NULL)
- Die("memory failure\n");
- revcomp(revfrag, frag);
- free(frag);
- frag = revfrag;
- swapfoo = from; from = to; to = swapfoo;
-
- /* reverse complement nullifies secondary structure */
- if (sqinfo.flags & SQINFO_SS)
- { free(sqinfo.ss); sqinfo.flags &= ~SQINFO_SS; }
- }
-
- if (! (sqinfo.flags & SQINFO_ID))
- SetSeqinfoString(&sqinfo, sqinfo.name, SQINFO_ID);
-
- if (! (sqinfo.flags & SQINFO_OLEN))
- { sqinfo.olen = sqinfo.len; sqinfo.flags |= SQINFO_OLEN; }
-
- sqinfo.len = (to > from) ? to-from+1 : from-to+1;
- sqinfo.flags |= SQINFO_LEN;
-
- if (rename != NULL)
- SetSeqinfoString(&sqinfo, rename, SQINFO_NAME);
-
- source_start = (sqinfo.flags & SQINFO_START) ? sqinfo.start : 1;
- source_stop = (sqinfo.flags & SQINFO_STOP) ? sqinfo.stop : sqinfo.len;
- source_orient= (source_stop > source_start) ? 1 : -1;
-
- sqinfo.start = source_start + (from- 1) * source_orient;
- sqinfo.stop = source_start + (to - 1) * source_orient;
- sqinfo.flags |= SQINFO_START | SQINFO_STOP;
-
- WriteSeq(outfp, outfmt, frag, &sqinfo);
- free(frag);
- FreeSequence(seq, &sqinfo);
- }
-
- if (outfile != NULL)
- printf("Fragment written to file %s\n", outfile);
-
- SeqfileClose(seqfp);
- fclose(outfp);
- return(0);
-}
diff --git a/squid/shuffle.c b/squid/shuffle.c
deleted file mode 100644
index f3b4a4c..0000000
--- a/squid/shuffle.c
+++ /dev/null
@@ -1,641 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* shuffle.c
- *
- * Routines for randomizing sequences.
- *
- * All routines are alphabet-independent (DNA, protein, RNA, whatever);
- * they assume that input strings are purely alphabetical [a-zA-Z], and
- * will return strings in all upper case [A-Z].
- *
- * All return 1 on success, and 0 on failure; 0 status invariably
- * means the input string was not alphabetical.
- *
- * StrShuffle() - shuffled string, preserve mono-symbol composition.
- * StrDPShuffle() - shuffled string, preserve mono- and di-symbol composition.
- *
- * StrMarkov0() - random string, same zeroth order Markov properties.
- * StrMarkov1() - random string, same first order Markov properties.
- *
- * StrReverse() - simple reversal of string
- * StrRegionalShuffle() - mono-symbol shuffled string in regional windows
- *
- * There are also similar routines for shuffling alignments:
- *
- * AlignmentShuffle() - alignment version of StrShuffle().
- * AlignmentBootstrap() - sample with replacement; a bootstrap dataset.
- * QRNAShuffle() - shuffle a pairwise alignment, preserving all gap positions.
- *
- * CVS $Id: shuffle.c,v 1.8 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <string.h>
-#include <ctype.h>
-
-#include "squid.h"
-#include "sre_random.h"
-
-/* Function: StrShuffle()
- *
- * Purpose: Returns a shuffled version of s2, in s1.
- * (s1 and s2 can be identical, to shuffle in place.)
- *
- * Args: s1 - allocated space for shuffled string.
- * s2 - string to shuffle.
- *
- * Return: 1 on success.
- */
-int
-StrShuffle(char *s1, char *s2)
-{
- int len;
- int pos;
- char c;
-
- if (s1 != s2) strcpy(s1, s2);
- for (len = strlen(s1); len > 1; len--)
- {
- pos = CHOOSE(len);
- c = s1[pos];
- s1[pos] = s1[len-1];
- s1[len-1] = c;
- }
- return 1;
-}
-
-/* Function: StrDPShuffle()
- * Date: SRE, Fri Oct 29 09:15:17 1999 [St. Louis]
- *
- * Purpose: Returns a shuffled version of s2, in s1.
- * (s1 and s2 may be identical; i.e. a string
- * may be shuffled in place.) The shuffle is a
- * "doublet-preserving" (DP) shuffle. Both
- * mono- and di-symbol composition are preserved.
- *
- * Done by searching for a random Eulerian
- * walk on a directed multigraph.
- * Reference: S.F. Altschul and B.W. Erickson, Mol. Biol.
- * Evol. 2:526-538, 1985. Quoted bits in my comments
- * are from Altschul's outline of the algorithm.
- *
- * Args: s1 - RETURN: the string after it's been shuffled
- * (space for s1 allocated by caller)
- * s2 - the string to be shuffled
- *
- * Returns: 0 if string can't be shuffled (it's not all [a-zA-z]
- * alphabetic.
- * 1 on success.
- */
-int
-StrDPShuffle(char *s1, char *s2)
-{
- int len;
- int pos; /* a position in s1 or s2 */
- int x,y; /* indices of two characters */
- char **E; /* edge lists: E[0] is the edge list from vertex A */
- int *nE; /* lengths of edge lists */
- int *iE; /* positions in edge lists */
- int n; /* tmp: remaining length of an edge list to be shuffled */
- char sf; /* last character in s2 */
- char Z[26]; /* connectivity in last edge graph Z */
- int keep_connecting; /* flag used in Z connectivity algorithm */
- int is_eulerian; /* flag used for when we've got a good Z */
-
- /* First, verify that the string is entirely alphabetic.
- */
- len = strlen(s2);
- for (pos = 0; pos < len; pos++)
- if (! isalpha((int) s2[pos])) return 0;
-
- /* "(1) Construct the doublet graph G and edge ordering E
- * corresponding to S."
- *
- * Note that these also imply the graph G; and note,
- * for any list x with nE[x] = 0, vertex x is not part
- * of G.
- */
- E = MallocOrDie(sizeof(char *) * 26);
- nE = MallocOrDie(sizeof(int) * 26);
- for (x = 0; x < 26; x++)
- {
- E[x] = MallocOrDie(sizeof(char) * (len-1));
- nE[x] = 0;
- }
-
- x = toupper((int) s2[0]) - 'A';
- for (pos = 1; pos < len; pos++)
- {
- y = toupper((int) s2[pos]) - 'A';
- E[x][nE[x]] = y;
- nE[x]++;
- x = y;
- }
-
- /* Now we have to find a random Eulerian edge ordering.
- */
- sf = toupper((int) s2[len-1]) - 'A';
- is_eulerian = 0;
- while (! is_eulerian)
- {
- /* "(2) For each vertex s in G except s_f, randomly select
- * one edge from the s edge list of E(S) to be the
- * last edge of the s list in a new edge ordering."
- *
- * select random edges and move them to the end of each
- * edge list.
- */
- for (x = 0; x < 26; x++)
- {
- if (nE[x] == 0 || x == sf) continue;
-
- pos = CHOOSE(nE[x]);
- y = E[x][pos];
- E[x][pos] = E[x][nE[x]-1];
- E[x][nE[x]-1] = y;
- }
-
- /* "(3) From this last set of edges, construct the last-edge
- * graph Z and determine whether or not all of its
- * vertices are connected to s_f."
- *
- * a probably stupid algorithm for looking at the
- * connectivity in Z: iteratively sweep through the
- * edges in Z, and build up an array (confusing called Z[x])
- * whose elements are 1 if x is connected to sf, else 0.
- */
- for (x = 0; x < 26; x++) Z[x] = 0;
- Z[(int) sf] = keep_connecting = 1;
-
- while (keep_connecting) {
- keep_connecting = 0;
- for (x = 0; x < 26; x++)
- {
- y = E[x][nE[x]-1]; /* xy is an edge in Z */
- if (Z[x] == 0 && Z[y] == 1) /* x is connected to sf in Z */
- {
- Z[x] = 1;
- keep_connecting = 1;
- }
- }
- }
-
- /* if any vertex in Z is tagged with a 0, it's
- * not connected to sf, and we won't have a Eulerian
- * walk.
- */
- is_eulerian = 1;
- for (x = 0; x < 26; x++)
- {
- if (nE[x] == 0 || x == sf) continue;
- if (Z[x] == 0) {
- is_eulerian = 0;
- break;
- }
- }
-
- /* "(4) If any vertex is not connected in Z to s_f, the
- * new edge ordering will not be Eulerian, so return to
- * (2). If all vertices are connected in Z to s_f,
- * the new edge ordering will be Eulerian, so
- * continue to (5)."
- *
- * e.g. note infinite loop while is_eulerian is FALSE.
- */
- }
-
- /* "(5) For each vertex s in G, randomly permute the remaining
- * edges of the s edge list of E(S) to generate the s
- * edge list of the new edge ordering E(S')."
- *
- * Essentially a StrShuffle() on the remaining nE[x]-1 elements
- * of each edge list; unfortunately our edge lists are arrays,
- * not strings, so we can't just call out to StrShuffle().
- */
- for (x = 0; x < 26; x++)
- for (n = nE[x] - 1; n > 1; n--)
- {
- pos = CHOOSE(n);
- y = E[x][pos];
- E[x][pos] = E[x][n-1];
- E[x][n-1] = y;
- }
-
- /* "(6) Construct sequence S', a random DP permutation of
- * S, from E(S') as follows. Start at the s_1 edge list.
- * At each s_i edge list, add s_i to S', delete the
- * first edge s_i,s_j of the edge list, and move to
- * the s_j edge list. Continue this process until
- * all edge lists are exhausted."
- */
- iE = MallocOrDie(sizeof(int) * 26);
- for (x = 0; x < 26; x++) iE[x] = 0;
-
- pos = 0;
- x = toupper((int) s2[0]) - 'A';
- while (1)
- {
- s1[pos++] = 'A' + x; /* add s_i to S' */
-
- y = E[x][iE[x]];
- iE[x]++; /* "delete" s_i,s_j from edge list */
-
- x = y; /* move to s_j edge list. */
-
- if (iE[x] == nE[x])
- break; /* the edge list is exhausted. */
- }
- s1[pos++] = 'A' + sf;
- s1[pos] = '\0';
-
- /* Reality checks.
- */
- if (x != sf) Die("hey, you didn't end on s_f.");
- if (pos != len) Die("hey, pos (%d) != len (%d).", pos, len);
-
- /* Free and return.
- */
- Free2DArray((void **) E, 26);
- free(nE);
- free(iE);
- return 1;
-}
-
-
-/* Function: StrMarkov0()
- * Date: SRE, Fri Oct 29 11:08:31 1999 [St. Louis]
- *
- * Purpose: Returns a random string s1 with the same
- * length and zero-th order Markov properties
- * as s2.
- *
- * s1 and s2 may be identical, to randomize s2
- * in place.
- *
- * Args: s1 - allocated space for random string
- * s2 - string to base s1's properties on.
- *
- * Returns: 1 on success; 0 if s2 doesn't look alphabetical.
- */
-int
-StrMarkov0(char *s1, char *s2)
-{
- int len;
- int pos;
- float p[26]; /* symbol probabilities */
-
- /* First, verify that the string is entirely alphabetic.
- */
- len = strlen(s2);
- for (pos = 0; pos < len; pos++)
- if (! isalpha((int) s2[pos])) return 0;
-
- /* Collect zeroth order counts and convert to frequencies.
- */
- FSet(p, 26, 0.);
- for (pos = 0; pos < len; pos++)
- p[(int)(toupper((int) s2[pos]) - 'A')] += 1.0;
- FNorm(p, 26);
-
- /* Generate a random string using those p's.
- */
- for (pos = 0; pos < len; pos++)
- s1[pos] = FChoose(p, 26) + 'A';
- s1[pos] = '\0';
-
- return 1;
-}
-
-
-/* Function: StrMarkov1()
- * Date: SRE, Fri Oct 29 11:22:20 1999 [St. Louis]
- *
- * Purpose: Returns a random string s1 with the same
- * length and first order Markov properties
- * as s2.
- *
- * s1 and s2 may be identical, to randomize s2
- * in place.
- *
- * Args: s1 - allocated space for random string
- * s2 - string to base s1's properties on.
- *
- * Returns: 1 on success; 0 if s2 doesn't look alphabetical.
- */
-int
-StrMarkov1(char *s1, char *s2)
-{
- int len;
- int pos;
- int x,y;
- int i; /* initial symbol */
- float p[26][26]; /* symbol probabilities */
-
- /* First, verify that the string is entirely alphabetic.
- */
- len = strlen(s2);
- for (pos = 0; pos < len; pos++)
- if (! isalpha((int) s2[pos])) return 0;
-
- /* Collect first order counts and convert to frequencies.
- */
- for (x = 0; x < 26; x++) FSet(p[x], 26, 0.);
-
- i = x = toupper((int) s2[0]) - 'A';
- for (pos = 1; pos < len; pos++)
- {
- y = toupper((int) s2[pos]) - 'A';
- p[x][y] += 1.0;
- x = y;
- }
- for (x = 0; x < 26; x++)
- FNorm(p[x], 26);
-
- /* Generate a random string using those p's.
- */
- x = i;
- s1[0] = x + 'A';
- for (pos = 1; pos < len; pos++)
- {
- y = FChoose(p[x], 26);
- s1[pos] = y + 'A';
- x = y;
- }
- s1[pos] = '\0';
-
- return 1;
-}
-
-
-
-/* Function: StrReverse()
- * Date: SRE, Thu Nov 20 10:54:52 1997 [St. Louis]
- *
- * Purpose: Returns a reversed version of s2, in s1.
- * (s1 and s2 can be identical, to reverse in place)
- *
- * Args: s1 - allocated space for reversed string.
- * s2 - string to reverse.
- *
- * Return: 1.
- */
-int
-StrReverse(char *s1, char *s2)
-{
- int len;
- int pos;
- char c;
-
- len = strlen(s2);
- for (pos = 0; pos < len/2; pos++)
- { /* swap ends */
- c = s2[len-pos-1];
- s1[len-pos-1] = s2[pos];
- s1[pos] = c;
- }
- if (len%2) { s1[pos] = s2[pos]; } /* copy middle residue in odd-len s2 */
- s1[len] = '\0';
- return 1;
-}
-
-/* Function: StrRegionalShuffle()
- * Date: SRE, Thu Nov 20 11:02:34 1997 [St. Louis]
- *
- * Purpose: Returns a regionally shuffled version of s2, in s1.
- * (s1 and s2 can be identical to regionally
- * shuffle in place.) See [Pearson88].
- *
- * Args: s1 - allocated space for regionally shuffled string.
- * s2 - string to regionally shuffle
- * w - window size (typically 10 or 20)
- *
- * Return: 1.
- */
-int
-StrRegionalShuffle(char *s1, char *s2, int w)
-{
- int len;
- char c;
- int pos;
- int i, j;
-
- if (s1 != s2) strcpy(s1, s2);
- len = strlen(s1);
-
- for (i = 0; i < len; i += w)
- for (j = MIN(len-1, i+w-1); j > i; j--)
- {
- pos = i + CHOOSE(j-i);
- c = s1[pos];
- s1[pos] = s1[j];
- s1[j] = c;
- }
- return 1;
-}
-
-
-/* Function: AlignmentShuffle()
- * Date: SRE, Sun Apr 22 18:37:15 2001 [St. Louis]
- *
- * Purpose: Returns a shuffled version of ali2, in ali1.
- * (ali1 and ali2 can be identical, to shuffle
- * in place.) The alignment columns are shuffled,
- * preserving % identity within the columns.
- *
- * Args: ali1 - allocated space for shuffled alignment
- * [0..nseq-1][0..alen-1]
- * ali2 - alignment to be shuffled
- * nseq - number of sequences in the alignment
- * alen - length of alignment, in columns.
- *
- * Returns: int
- */
-int
-AlignmentShuffle(char **ali1, char **ali2, int nseq, int alen)
-{
- int i;
- int pos;
- char c;
-
- if (ali1 != ali2)
- {
- for (i = 0; i < nseq; i++) strcpy(ali1[i], ali2[i]);
- }
-
- for (i = 0; i < nseq; i++)
- ali1[i][alen] = '\0';
-
- for (; alen > 1; alen--)
- {
- pos = CHOOSE(alen);
- for (i = 0; i < nseq; i++)
- {
- c = ali1[i][pos];
- ali1[i][pos] = ali1[i][alen-1];
- ali1[i][alen-1] = c;
- }
- }
-
- return 1;
-}
-
-/* Function: AlignmentBootstrap()
- * Date: SRE, Sun Apr 22 18:49:14 2001 [St. Louis]
- *
- * Purpose: Returns a bootstrapped alignment sample in ali1,
- * constructed from ali2 by sampling columns with
- * replacement.
- *
- * Unlike the other shuffling routines, ali1 and
- * ali2 cannot be the same. ali2 is left unchanged.
- * ali1 must be a properly allocated space for an
- * alignment the same size as ali2.
- *
- * Args: ali1 - allocated space for bootstrapped alignment
- * [0..nseq-1][0..alen-1]
- * ali2 - alignment to be bootstrapped
- * nseq - number of sequences in the alignment
- * alen - length of alignment, in columns.
- *
- * Returns: 1 on success.
- */
-int
-AlignmentBootstrap(char **ali1, char **ali2, int nseq, int alen)
-{
- int pos;
- int col;
- int i;
-
- for (pos = 0; pos < alen; pos++)
- {
- col = CHOOSE(alen);
- for (i = 0; i < nseq; i++)
- ali1[i][pos] = ali2[i][col];
- }
- for (i = 0; i < nseq; i++)
- ali1[i][alen] = '\0';
-
- return 1;
-}
-
-
-/* Function: QRNAShuffle()
- * Date: SRE, Mon Dec 10 10:14:12 2001 [St. Louis]
- *
- * Purpose: Shuffle a pairwise alignment x,y while preserving the
- * position of gaps; return the shuffled alignment in xs,
- * ys.
- *
- * Works by doing three separate
- * shuffles, of (1) columns with residues in both
- * x and y, (2) columns with residue in x and gap in y,
- * and (3) columns with gap in x and residue in y.
- *
- * xs,x and ys,y may be identical: that is, to shuffle
- * an alignment "in place", destroying the original
- * alignment, just call:
- * QRNAShuffle(x,y,x,y);
- *
- * Args: xs, ys: allocated space for shuffled pairwise ali of x,y [L+1]
- * x, y: pairwise alignment to be shuffled [0..L-1]
- *
- * Returns: 1 on success, 0 on failure.
- * The shuffled alignment is returned in xs, ys.
- */
-int
-QRNAShuffle(char *xs, char *ys, char *x, char *y)
-{
- int L;
- int *xycol, *xcol, *ycol;
- int nxy, nx, ny;
- int i;
- int pos, c;
- char xsym, ysym;
-
- if (xs != x) strcpy(xs, x);
- if (ys != y) strcpy(ys, y);
-
- /* First, construct three arrays containing lists of the column positions
- * of the three types of columns. (If a column contains gaps in both x and y,
- * we've already simply copied it to the shuffled sequence.)
- */
- L = strlen(x);
- xycol = MallocOrDie(sizeof(int) * L);
- xcol = MallocOrDie(sizeof(int) * L);
- ycol = MallocOrDie(sizeof(int) * L);
- nxy = nx = ny = 0;
-
- for (i = 0; i < L; i++)
- {
- if (isgap(x[i]) && isgap(y[i])) { continue; }
- else if (! isgap(x[i]) && ! isgap(y[i])) { xycol[nxy] = i; nxy++; }
- else if (isgap(x[i])) { ycol[ny] = i; ny++; }
- else if (isgap(y[i])) { xcol[nx] = i; nx++; }
- }
-
- /* Second, shuffle the sequences indirectly, via shuffling these arrays.
- * Yow, careful with those indices, and with order of the statements...
- */
- for (; nxy > 1; nxy--) {
- pos = CHOOSE(nxy);
- xsym = xs[xycol[pos]]; ysym = ys[xycol[pos]]; c = xycol[pos];
- xs[xycol[pos]] = xs[xycol[nxy-1]]; ys[xycol[pos]] = ys[xycol[nxy-1]]; xycol[pos] = xycol[nxy-1];
- xs[xycol[nxy-1]] = xsym; ys[xycol[nxy-1]] = ysym; xycol[pos] = xycol[nxy-1];
- }
- for (; nx > 1; nx--) {
- pos = CHOOSE(nx);
- xsym = xs[xcol[pos]]; ysym = ys[xcol[pos]]; c = xcol[pos];
- xs[xcol[pos]] = xs[xcol[nx-1]]; ys[xcol[pos]] = ys[xcol[nx-1]]; xcol[pos] = xcol[nx-1];
- xs[xcol[nx-1]] = xsym; ys[xcol[nx-1]] = ysym; xcol[nx-1] = c;
- }
- for (; ny > 1; ny--) {
- pos = CHOOSE(ny);
- xsym = xs[ycol[pos]]; ysym = ys[ycol[pos]]; c = ycol[pos];
- xs[ycol[pos]] = xs[ycol[ny-1]]; ys[ycol[pos]] = ys[ycol[ny-1]]; ycol[pos] = ycol[ny-1];
- xs[ycol[ny-1]] = xsym; ys[ycol[ny-1]] = ysym; ycol[ny-1] = c;
- }
-
- free(xycol); free(xcol); free(ycol);
- return 1;
-}
-
-
-#ifdef TESTDRIVER
-/*
- * cc -g -o testdriver -DTESTDRIVER -L. shuffle.c -lsquid -lm
- */
-int
-main(int argc, char **argv)
-{
- char s1[100];
- char s2[100];
-
- sre_srandom(42);
- strcpy(s2, "GGGGGGGGGGCCCCCCCCCC");
- /* strcpy(s2, "AGACATAAAGTTCCGTACTGCCGGGAT");
- */
- StrDPShuffle(s1, s2);
- printf("DPshuffle: %s\n", s1);
- StrMarkov0(s1,s2);
- printf("Markov 0 : %s\n", s1);
- StrMarkov1(s1,s2);
- printf("Markov 1 : %s\n", s1);
-
- strcpy(s1, "ACGTACGT--------ACGTACGT----ACGTACGT");
- strcpy(s2, "ACGTACGTACGTACGT------------ACGTACGT");
- QRNAShuffle(s1,s2,s1,s2);
- printf("QRNA : %s\n", s1);
- printf(" : %s\n", s2);
-
- return 0;
-}
-#endif
diff --git a/squid/shuffle_main.c b/squid/shuffle_main.c
deleted file mode 100644
index ee95e55..0000000
--- a/squid/shuffle_main.c
+++ /dev/null
@@ -1,336 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* main for shuffle
- *
- * shuffle - generate shuffled sequences
- * Mon Feb 26 16:56:08 1996
- *
- * CVS $Id: shuffle_main.c,v 1.15 2003/05/26 16:21:50 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include "squid.h"
-#include "sre_random.h"
-
-static char banner[] = "shuffle - generated shuffled (or otherwise randomized) sequence";
-
-static char usage[] = "\
-Usage: shuffle [-options] <seqfile>\n\
- Available options:\n\
- -h : help; print version and usage info\n\
- -n <n> : make <n> samples per input seq (default 1)\n\
- -o <f> : save shuffled sequences to file <f>\n\
- -t <n> : truncate/delete inputs to fixed length <n>\n\
-\n\
- Default: shuffle each input randomly, preserving mono-symbol composition.\n\
- Other choices (exclusive; can't use more than one) :\n\
- -d : shuffle but preserve both mono- and di-symbol composition\n\
- -0 : generate with same 0th order Markov properties as each input\n\
- -1 : generate with same 1st order Markov properties as each input\n\
- -l : make iid sequences of same number and length as inputs\n\
- -r : reverse inputs\n\
- -w <n> : regionally shuffle inputs in window size <n>\n\
- -i : make [-n] iid seqs of length [-t] of type [--dna|--amino];\n\
- when -i is set, no <seqfile> argument is used\n\
-";
-
-static char experts[] = "\
- --alignment : <seqfile> is an alignment; shuffle the columns\n\
- --amino : synthesize protein sequences [default] (see -i, -l)\n\
- --dna : synthesize DNA sequences (see -i, -l))\n\
- --informat <s> : specify sequence file format <s>\n\
- --nodesc : remove sequence description lines\n\
- --qrna : <seqfile> is a QRNA/FASTA pairwise alignment file;\n\
- shuffle the pairwise alignments, preserving gap position\n\
- --seed <s> : set random number seed to <s>\n\
-";
-
-static struct opt_s OPTIONS[] = {
- { "-0", TRUE, sqdARG_NONE }, /* 0th order Markov */
- { "-1", TRUE, sqdARG_NONE }, /* 1st order Markov */
- { "-d", TRUE, sqdARG_NONE }, /* digram shuffle */
- { "-h", TRUE, sqdARG_NONE }, /* help */
- { "-i", TRUE, sqdARG_NONE }, /* make iid seq of set length */
- { "-l", TRUE, sqdARG_NONE }, /* make iid seq of same length */
- { "-n", TRUE, sqdARG_INT }, /* number of shuffles per input seq */
- { "-o", TRUE, sqdARG_STRING }, /* file to save to */
- { "-r", TRUE, sqdARG_NONE }, /* reverse seq rather than shuffle */
- { "-t", TRUE, sqdARG_INT }, /* truncation of inputs to fixed len */
- { "-w", TRUE, sqdARG_INT }, /* do regional shuffling */
- { "--alignment",FALSE, sqdARG_NONE }, /* input is alignment; shuff cols */
- { "--amino", FALSE, sqdARG_NONE }, /* make iid protein seqs [default]*/
- { "--dna", FALSE, sqdARG_NONE }, /* make iid DNA seqs */
- { "--informat", FALSE, sqdARG_STRING }, /* remove desc lines */
- { "--nodesc", FALSE, sqdARG_NONE }, /* remove desc lines */
- { "--qrna", FALSE, sqdARG_NONE }, /* pairwise alignment shuffler */
- { "--seed", FALSE, sqdARG_INT }, /* set the random number seed */
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-static void shuffle_alignment_file(FILE *ofp, char *afile, int fmt);
-
-int
-main(int argc, char **argv)
-{
- char *seqfile; /* name of sequence file */
- SQFILE *dbfp; /* open sequence file */
- int fmt; /* format of seqfile */
- char *seq; /* sequence */
- char sqname[32]; /* name of an iid sequence */
- SQINFO sqinfo; /* additional sequence info */
- char *shuff; /* shuffled sequence */
- int num; /* number to generate */
- int seed; /* random number generator seed */
- int i;
- int w; /* window size for regional shuffle (or 0) */
- int truncation; /* fixed length for truncation option (or 0) */
- int no_desc; /* TRUE to remove description lines */
- enum { /* shuffling strategy */
- DO_SHUFFLE, DO_DPSHUFFLE, DO_MARKOV0, DO_MARKOV1, DO_REVERSE, DO_REGIONAL,
- DO_IID_SAMELEN, DO_IID_FIXEDLEN} strategy;
- int do_dna; /* TRUE to make DNA iid seqs, not protein */
- int do_alignment; /* TRUE to shuffle alignment columns */
- int do_qrna; /* TRUE for pairwise alignment shuffling mode */
- char *outfile; /* name of save file (default NULL) */
- FILE *ofp; /* open output file (default stdout) */
-
- char *optname; /* option name */
- char *optarg; /* option argument (or NULL) */
- int optind; /* index of next argv[] */
-
-
- /***********************************************
- * Parse command line
- ***********************************************/
-
- fmt = SQFILE_UNKNOWN; /* autodetect file format by default */
- num = 0;
- seed = (int) time ((time_t *) NULL);
- w = 0;
- truncation = 0;
- strategy = DO_SHUFFLE;
- no_desc = FALSE;
- do_dna = FALSE;
- do_alignment = FALSE;
- do_qrna = FALSE;
- outfile = NULL;
- ofp = stdout;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "-0") == 0) strategy = DO_MARKOV0;
- else if (strcmp(optname, "-1") == 0) strategy = DO_MARKOV1;
- else if (strcmp(optname, "-d") == 0) strategy = DO_DPSHUFFLE;
- else if (strcmp(optname, "-n") == 0) num = atoi(optarg);
- else if (strcmp(optname, "-o") == 0) outfile = optarg;
- else if (strcmp(optname, "-w") == 0) {strategy = DO_REGIONAL; w = atoi(optarg); }
- else if (strcmp(optname, "-i") == 0) strategy = DO_IID_FIXEDLEN;
- else if (strcmp(optname, "-l") == 0) strategy = DO_IID_SAMELEN;
- else if (strcmp(optname, "-r") == 0) strategy = DO_REVERSE;
- else if (strcmp(optname, "-t") == 0) truncation = atoi(optarg);
-
- else if (strcmp(optname, "--alignment")== 0) do_alignment = TRUE;
- else if (strcmp(optname, "--amino") == 0) do_dna = FALSE;
- else if (strcmp(optname, "--dna") == 0) do_dna = TRUE;
- else if (strcmp(optname, "--nodesc") == 0) no_desc = TRUE;
- else if (strcmp(optname, "--qrna") == 0) do_qrna = TRUE;
- else if (strcmp(optname, "--seed") == 0) seed = atoi(optarg);
- else if (strcmp(optname, "--informat") == 0) {
- fmt = String2SeqfileFormat(optarg);
- if (fmt == SQFILE_UNKNOWN)
- Die("unrecognized sequence file format \"%s\"", optarg);
- }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (outfile != NULL) {
- if ((ofp = fopen(outfile,"w")) == NULL)
- Die("Failed to open output file %s", outfile);
- }
-
- /*****************************************************************
- * Special case, 1: IID sequence generation.
- * -i option is special, because it synthesizes, rather than
- * shuffles. Doesn't take a seqfile argument;
- * requires -n, -t; and doesn't use the same code logic as the
- * other shuffling strategies. Note that we misuse/overload the
- * -t "truncation length" option to set our fixed length for
- * generating iid sequence.
- *****************************************************************/
-
- if (strategy == DO_IID_FIXEDLEN) {
- if (num == 0 || truncation == 0)
- Die("-i (i.i.d. sequence generation) requires -n,-t to be set\n%s\n",
- usage);
- if (argc-optind != 0)
- Die("-i (i.i.d. sequence generation) takes no seqfile argument\n%s\n",
- usage);
- sre_srandom(seed);
- for (i = 0; i < num; i++)
- {
- if (do_dna)
- shuff = RandomSequence(DNA_ALPHABET, dnafq, 4, truncation);
- else
- shuff = RandomSequence(AMINO_ALPHABET, aafq, 20, truncation);
-
- /* pedantic note: sqname has room for 31 char + \0, so
- * there's room for 24 digits - a 32-bit integer can only run up
- * to 10 digits, and a 64-bit integer to 20, so we don't worry
- * about the following sprintf() overrunning its bounds.
- */
- sprintf(sqname, "randseq%d", i);
- WriteSimpleFASTA(ofp, shuff, sqname, NULL);
- free(shuff);
- }
- return 0;
- }
-
- /*****************************************************************
- * Check command line
- *****************************************************************/
-
- if (argc - optind != 1)
- Die("Incorrect number of command line arguments\n%s\n", usage);
- seqfile = argv[optind];
- if (num == 0) num = 1; /* set default shuffle number per sequence */
- sre_srandom(seed);
-
- /* Try to work around inability to autodetect from a pipe or .gz:
- * assume FASTA format
- */
- if (fmt == SQFILE_UNKNOWN &&
- (Strparse("^.*\\.gz$", seqfile, 0) || strcmp(seqfile, "-") == 0))
- fmt = SQFILE_FASTA;
-
- /*****************************************************************
- * Special case, 2: Multiple alignment shuffling
- *****************************************************************/
- if (do_alignment)
- {
- shuffle_alignment_file(ofp, seqfile, fmt);
- if (outfile != NULL) fclose(ofp);
- return 0;
- }
-
- /*****************************************************************
- * Main logic of the shuffling program:
- * expect one seqfile argument
- *****************************************************************/
-
- if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)
- Die("Failed to open sequence file %s for reading", seqfile);
- if (do_qrna && dbfp->format != SQFILE_FASTA)
- Die("--qrna option requires that %s is in QRNA/FASTA format", seqfile);
-
- while (ReadSeq(dbfp, dbfp->format, &seq, &sqinfo))
- {
- /* Another special case: QRNA mode
- */
- if (do_qrna)
- {
- char *seq2;
- SQINFO sqinfo2;
-
- if (! ReadSeq(dbfp, dbfp->format, &seq2, &sqinfo2))
- Die("Failed to read an aligned partner for sequence %s", sqinfo.name);
- if (strlen(seq) != strlen(seq2))
- Die("Length of %s is not the same as %s\n", sqinfo.name, sqinfo2.name);
-
- QRNAShuffle(seq, seq2, seq, seq2);
-
- WriteSeq(ofp, SQFILE_FASTA, seq, &sqinfo);
- WriteSeq(ofp, SQFILE_FASTA, seq2, &sqinfo2);
-
- FreeSequence(seq, &sqinfo);
- FreeSequence(seq2, &sqinfo2);
- continue;
- }
-
- /* back to the main logic...
- */
- shuff = (char *) MallocOrDie ((sqinfo.len + 1) * sizeof(char));
- if (no_desc) strcpy(sqinfo.desc, "");
-
- /* If we're truncating seq, do it now.
- */
- if (truncation > 0)
- {
- int start;
- if (sqinfo.len < truncation) {
- free(shuff);
- FreeSequence(seq, &sqinfo);
- continue;
- }
-
- start = CHOOSE(sqinfo.len - truncation + 1);
- strncpy(shuff, seq+start, truncation);
- shuff[truncation] = '\0';
- strcpy(seq, shuff);
- sqinfo.len = truncation;
- }
-
- for (i = 0; i < num; i++)
- {
- switch (strategy) {
- case DO_SHUFFLE: StrShuffle(shuff, seq); break;
- case DO_DPSHUFFLE: StrDPShuffle(shuff, seq); break;
- case DO_MARKOV0: StrMarkov0(shuff, seq); break;
- case DO_MARKOV1: StrMarkov1(shuff, seq); break;
- case DO_REVERSE: StrReverse(shuff, seq); break;
- case DO_REGIONAL: StrRegionalShuffle(shuff, seq, w); break;
- case DO_IID_SAMELEN:
- free(shuff);
- shuff = RandomSequence(AMINO_ALPHABET, aafq, 20, sqinfo.len);
- break;
- default: Die("choked on a bad enum; tragic.");
- }
-
- WriteSeq(ofp, SQFILE_FASTA, shuff, &sqinfo);
- }
-
- if (shuff != NULL) free(shuff);
- FreeSequence(seq, &sqinfo);
- }
-
- SeqfileClose(dbfp);
- if (outfile != NULL) fclose(ofp);
- return 0;
-}
-
-
-static void
-shuffle_alignment_file(FILE *ofp, char *afile, int fmt)
-{
- MSAFILE *afp;
- MSA *msa;
-
- if ((afp = MSAFileOpen(afile, fmt, NULL)) == NULL)
- Die("Alignment file %s could not be opened for reading", afile);
- while ((msa = MSAFileRead(afp)) != NULL)
- {
- /* shuffle in place */
- AlignmentShuffle(msa->aseq, msa->aseq, msa->nseq, msa->alen);
- /* write in same format we read in */
- MSAFileWrite(ofp, msa, afp->format, FALSE);
- MSAFree(msa);
- }
- MSAFileClose(afp);
-}
diff --git a/squid/sindex_main.c b/squid/sindex_main.c
deleted file mode 100644
index ff060bb..0000000
--- a/squid/sindex_main.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* sindex_main.c, SRE, Fri Feb 16 08:38:39 2001 [St. Louis]
- *
- * sindex -- create SSI index of sequence file(s) for sfetch
- *
- * CVS $Id: sindex_main.c,v 1.8 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include "squid.h"
-#include "msa.h"
-#include "ssi.h"
-
-static char banner[] = "sindex - create SSI index of sequence file(s) for sfetch";
-
-static char usage[] = "\
-Usage: sindex [-options] <seqfile>...\n\
- Available options:\n\
- -h : help; print version and usage info.\n\
- -o <f> : output the SSI index to file named <f>\n\
-";
-
-static char experts[] = "\
- --64 : force index mode to 64-bit, even on small files\n\
- --external : force index compilation to use external (on-disk) sorting\n\
- --informat <s> : specify input sequence file format <s>\n\
- --pfamseq : index a FASTA file with >(name) (accession) (desc)\n\
-";
-
-struct opt_s OPTIONS[] = {
- { "-h", TRUE, sqdARG_NONE },
- { "-o", TRUE, sqdARG_STRING },
- { "--64", FALSE, sqdARG_NONE },
- { "--external", FALSE, sqdARG_NONE },
- { "--informat", FALSE, sqdARG_STRING },
- { "--pfamseq", FALSE, sqdARG_NONE },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-int
-main(int argc, char **argv)
-{
- char *file; /* name of a sequence file */
- SQFILE *sfp; /* open sequence file */
- int format; /* forced sequence file format, if any */
- int mode; /* SSI_OFFSET_I32 or SSI_OFFSET_I64 */
- int idx; /* counter over files */
- int status; /* return status from an SSI call */
- SSIINDEX *ssi; /* the index we're creating */
- char *ssifile; /* file name for the SSI index */
- int fh; /* handle on current file */
- char *seq; /* a sequence read from the file */
- SQINFO sqinfo; /* info on the sequence */
-
- int do_pfamseq; /* TRUE to index name and accession in a FASTA*/
- int do_external; /* TRUE to force external sorting */
- char *optname;
- char *optarg;
- int optind;
-
- /***********************************************
- * Parse the command line
- ***********************************************/
-
- /* initializations and defaults */
- format = SQFILE_UNKNOWN; /* autodetecting format is the default */
- mode = SSI_OFFSET_I32; /* default = 32 bit mode */
- ssifile = NULL; /* default: set SSI file name as <file>.ssi */
- do_pfamseq = FALSE; /* default: don't hack FASTA parsing, duh */
- do_external = FALSE; /* default: use in-memory sorting if possible */
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "-o") == 0) ssifile = sre_strdup(optarg, -1);
- else if (strcmp(optname, "--64") == 0) mode = SSI_OFFSET_I64;
- else if (strcmp(optname, "--external") == 0) do_external = TRUE;
- else if (strcmp(optname, "--pfamseq") == 0) do_pfamseq = TRUE;
- else if (strcmp(optname, "--informat") == 0) {
- format = String2SeqfileFormat(optarg);
- if (format == SQFILE_UNKNOWN)
- Die("unrecognized input sequence file format \"%s\"", optarg);
- }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc - optind < 1)
- Die("Incorrect number of command line arguments.\n%s\n", usage);
-
-
- /*****************************************************************
- * Get set up...
- *****************************************************************/
-
- /* Determine whether we'll index in 32-bit or 64-bit mode.
- * 32-bit is default, but 64-bit trumps; if any file needs 64-bit,
- * we index them all that way.
- */
- for (idx = optind; idx < argc; idx++)
- {
- file = argv[idx];
- if ((status = SSIRecommendMode(file)) == -1)
- Die("Couldn't stat %s - file doesn't exist, or is too big", file);
- if (status == SSI_OFFSET_I64) mode = SSI_OFFSET_I64;
- }
-
- if (ssifile == NULL) {
- ssifile = sre_strdup(file, -1);
- sre_strcat(&ssifile, -1, ".ssi", -1);
- }
-
- if ((ssi = SSICreateIndex(mode)) == NULL)
- Die("Couldn't allocate/initialize the new SSI index\n");
-
- if (do_external)
- SSIForceExternalSort(ssi);
-
- /*****************************************************************
- * Go through the files one at a time and compile index.
- *****************************************************************/
-
- for (idx = optind; idx < argc; idx++)
- {
- file = argv[idx];
- printf("Working on file %s... \t", file);
- fflush(stdout);
-
- if ((sfp = SeqfileOpenForIndexing(file, format, NULL, mode)) == NULL)
- Die("Failed to open sequence file %s for reading", file);
-
- if ((status = SSIAddFileToIndex(ssi, file, sfp->format, &fh)) != 0)
- Die("SSI error: %s\n", SSIErrorString(status));
-
- while (ReadSeq(sfp, sfp->format, &seq, &sqinfo)) {
- if ((status = SSIAddPrimaryKeyToIndex(ssi, sqinfo.name, fh,
- &(sfp->r_off), &(sfp->d_off),
- sqinfo.len)) != 0)
- Die("SSI error: %s\n", SSIErrorString(status));
-
-#if DEBUGLEVEL >= 2
- if (mode == SSI_OFFSET_I32)
- SQD_DPRINTF2(("Added primary key %s: r_off=%lu, d_off=%lu len=%d\n",
- sqinfo.name, sfp->r_off.off.i32,
- sfp->d_off.off.i32, sqinfo.len));
- else
- SQD_DPRINTF2(("Added primary key %s: r_off=%llu, d_off=%llu len=%d\n",
- sqinfo.name, sfp->r_off.off.i64, sfp->d_off.off.i64,
- sqinfo.len));
-#endif
-
- if (sqinfo.flags & SQINFO_ID) {
- if ((status = SSIAddSecondaryKeyToIndex(ssi, sqinfo.id, sqinfo.name)) != 0)
- Die("SSI error: %s\n", SSIErrorString(status));
- }
-
- if (sqinfo.flags & SQINFO_ACC) {
- if ((status = SSIAddSecondaryKeyToIndex(ssi, sqinfo.acc, sqinfo.name)) != 0)
- Die("SSI error: %s\n", SSIErrorString(status));
- }
-
- if (do_pfamseq && sfp->format == SQFILE_FASTA && sqinfo.desc != NULL) {
- char *acc, *s;
-
- s = sqinfo.desc;
- acc = sre_strtok(&s, " \t", NULL);
- if (acc != NULL) {
- if ((status = SSIAddSecondaryKeyToIndex(ssi, acc, sqinfo.name)) != 0)
- Die("SSI error: %s\n", SSIErrorString(status));
- }
- }
-
- FreeSequence(seq, &sqinfo);
- }
- if (sfp->bpl > 0 && sfp->rpl > 0) {
- if ((status = SSISetFileForSubseq(ssi, fh, sfp->bpl, sfp->rpl)) != 0)
- Die("SSI error: %s\n", SSIErrorString(status));
- printf("FAST_SUBSEQ set...\t");
- } else
- printf(" \t");
-
- SeqfileClose(sfp);
- printf("[done]\n");
- }
-
- printf("Sorting and writing index to SSI file %s...\t", ssifile);
- fflush(stdout);
- if ((status = SSIWriteIndex(ssifile, ssi)) != 0)
- Die("SSIWriteIndex() failed: %s", SSIErrorString(status));
- printf("[done]\n");
-
- printf("%s:\n", ssifile);
- printf("Mode: %s\n",
- mode == SSI_OFFSET_I32 ? "32-bit" : "64-bit");
- printf("Files: %d\n", ssi->nfiles);
- printf("Primary keys: %d\n", ssi->nprimary);
- printf("Secondary keys: %d\n", ssi->nsecondary);
-
- SSIFreeIndex(ssi);
-
- free(ssifile);
- return 0;
-}
diff --git a/squid/sqerror.c b/squid/sqerror.c
deleted file mode 100644
index 2187661..0000000
--- a/squid/sqerror.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* sqerror.c
- *
- * error handling for the squid library
- * CVS $Id: sqerror.c,v 1.6 2003/05/26 16:21:50 eddy Exp $
- */
-
-#include "squidconf.h"
-#include "squid.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-
-int squid_errno; /* a global errno equivalent */
-
-
-/* Function: Die()
- *
- * Purpose: Print an error message and die. The arguments
- * are formatted exactly like arguments to printf().
- *
- * Return: None. Exits the program.
- */
-/* VARARGS0 */
-void
-Die(char *format, ...)
-{
- va_list argp;
- /* format the error mesg */
- fprintf(stderr, "\nFATAL: ");
- va_start(argp, format);
- vfprintf(stderr, format, argp);
- va_end(argp);
- fprintf(stderr, "\n");
- fflush(stderr);
- /* exit */
- exit(1);
-}
-
-
-
-/* Function: Warn()
- *
- * Purpose: Print an error message and return. The arguments
- * are formatted exactly like arguments to printf().
- *
- * Return: (void)
- */
-/* VARARGS0 */
-void
-Warn(char *format, ...)
-{
- va_list argp;
- /* format the error mesg */
- fprintf(stderr, "WARNING: ");
- va_start(argp, format);
- vfprintf(stderr, format, argp);
- va_end(argp);
- fprintf(stderr, "\n");
- fflush(stderr);
-}
-
-/* Function: Panic()
- *
- * Purpose: Die from a lethal error that's not my problem,
- * but instead a failure of a StdC/POSIX call that
- * shouldn't fail. Call perror() to get the
- * errno flag, then die.
- *
- * Usually called by the PANIC macro which adds
- * the __FILE__ and __LINE__ information; see
- * structs.h.
- *
- * Inspired by code in Donald Lewine's book, _POSIX
- * Programmer's Guide_.
- */
-void
-Panic(char *file, int line)
-{
- (void) fprintf(stderr, "\nPANIC [%s line %d] ", file, line);
- (void) perror("Unusual error");
- exit(EXIT_FAILURE);
-}
-
diff --git a/squid/sqfuncs.h b/squid/sqfuncs.h
deleted file mode 100644
index fcd77e8..0000000
--- a/squid/sqfuncs.h
+++ /dev/null
@@ -1,272 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#ifndef SQFUNCSH_INCLUDED
-#define SQFUNCSH_INCLUDED
-/* sqfuncs.h
- *
- * Prototypes for squid library functions;
- * also makes a good reference list for what the package contains.
- *
- * Warning: squid is a slowly evolving beast. Some functions are
- * obsolete. Some functions are probably just wrong, dating to
- * a primordial era before I knew anything about what I was doing.
- * Some functions are both obsolete and wrong but still necessary
- * to get legacy code to compile.
- *
- * CVS $Id: sqfuncs.h,v 1.30 2003/05/26 16:21:50 eddy Exp $
- */
-
-/*
- * from aligneval.c
- */
-extern float ComparePairAlignments(char *known1, char *known2, char *calc1, char *calc2);
-extern float CompareRefPairAlignments(int *ref, char *known1, char *known2, char *calc1, char *calc2);
-extern float CompareMultAlignments(char **kseqs, char **tseqs, int N);
-extern float CompareRefMultAlignments(int *ref, char **kseqs, char **tseqs, int N);
-extern float PairwiseIdentity(char *s1, char *s2);
-extern float AlignmentIdentityBySampling(char **aseq, int L, int N, int nsample);
-extern char *MajorityRuleConsensus(char **aseq, int nseq, int alen);
-
-/*
- * from alignio.c
- */
-extern void AllocAlignment(int nseq, int alen, char ***ret_aseq, AINFO *ainfo);
-extern void InitAinfo(AINFO *ainfo);
-extern void FreeAlignment(char **aseqs, AINFO *ainfo);
-extern void SAMizeAlignment(char **aseq, int nseq, int alen);
-extern void SAMizeAlignmentByGapFrac(char **aseq, int nseq, int alen, float maxgap);
-extern int MakeAlignedString(char *aseq, int alen, char *ss, char **ret_s);
-extern int MakeDealignedString(char *aseq, int alen, char *ss, char **ret_s);
-extern int DealignedLength(char *aseq);
-extern int WritePairwiseAlignment(FILE *ofp, char *aseq1, char *name1, int spos1,
- char *aseq2, char *name2, int spos2,
- int **pam, int indent);
-extern int MingapAlignment(char **aseqs, AINFO *ainfo);
-extern int RandomAlignment(char **rseqs, SQINFO *sqinfo, int nseq, float pop, float pex,
- char ***ret_aseqs, AINFO *ainfo);
-extern void AlignmentHomogenousGapsym(char **aseq, int nseq, int alen, char gapsym);
-
-/* from cluster.c
- */
-extern int Cluster(float **mx, int N, enum clust_strategy mode, struct phylo_s **ret_tree);
-extern struct phylo_s *AllocPhylo(int N);
-extern void FreePhylo(struct phylo_s *tree, int N);
-extern void MakeDiffMx(char **aseqs, int num, float ***ret_dmx);
-extern void MakeIdentityMx(char **aseqs, int num, float ***ret_imx);
-extern void PrintNewHampshireTree(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N);
-extern void PrintPhylo(FILE *fp, AINFO *ainfo, struct phylo_s *tree, int N);
-
-/*
- * from dayhoff.c
- */
-extern int ParsePAMFile(FILE *fp, int ***ret_pam, float *ret_scale);
-extern void ScalePAM(int **pam, int scale);
-
-
-/* from file.c
- */
-extern char *FileDirname(char *filename);
-extern char *FileTail(char *file, int noextension);
-extern char *FileSameDirectory(char *full, char *file);
-extern char *FileConcat(char *dir, char *file);
-extern char *FileAddSuffix(char *filename, char *sfx);
-extern FILE *EnvFileOpen(char *fname, char *env, char **ret_dir);
-extern int FileExists(char *filename);
-
-
-/* from getopt.c
- */
-extern int Getopt(int argc, char **argv,
- struct opt_s *opt, int nopts, char *usage,
- int *ret_optind, char **ret_optname, char **ret_optarg);
-
-
-/* from hsregex.c
- * Henry Spencer's regex() code
- */
-extern int Strparse(char *rexp, char *s, int ntok);
-extern void SqdClean(void);
-extern sqd_regexp *sqd_regcomp(const char *re);
-extern int sqd_regexec(sqd_regexp *rp, const char *s);
-extern void sqd_regsub(const sqd_regexp *rp, const char *src, char *dst);
-extern void sqd_regerror(char *message);
-
-/* from interleaved.c
- */
-extern int IsInterleavedFormat(int format);
-extern int ReadInterleaved(char *seqfile,
- int (*skip_header)(FILE *),
- int (*parse_header)(FILE *, AINFO *),
- int (*is_dataline)(char *, char *),
- char ***ret_aseqs, AINFO *ainfo);
-extern int ReadAlignment(char *seqfile, int format, char ***ret_aseqs, AINFO *ainfo);
-
-
-/* from revcomp.c
- */
-extern char *revcomp(char *comp, char *seq);
-
-/*
- * from selex.c
- */
-extern int DealignAseqs(char **aseqs, int num, char ***ret_rseqs);
-extern int IsSELEXFormat(char *filename);
-extern int TruncateNames(char **names, int N); /* OBSOLETE? */
-
-/*
- * from seqencode.c
- */
-extern int seqcmp(char *s1, char *s2, int allow);
-extern int seqncmp(char *s1, char *s2, int n, int allow);
-extern int seqencode(char *codeseq,char *str);
-extern int coded_revcomp(char *comp, char *seq);
-extern int seqdecode(char *str, char *codeseq);
-extern int seqndecode(char *str, char *codeseq, int n);
-
-/*
- * from shuffle.c
- */
-extern int StrShuffle(char *s1, char *s2);
-extern int StrDPShuffle(char *s1, char *s2);
-extern int StrMarkov0(char *s1, char *s2);
-extern int StrMarkov1(char *s1, char *s2);
-extern int StrReverse(char *s1, char *s2);
-extern int StrRegionalShuffle(char *s1, char *s2, int w);
-extern int AlignmentShuffle(char **ali1, char **ali2, int nseq, int alen);
-extern int AlignmentBootstrap(char **ali1, char **ali2, int nseq, int alen);
-extern int QRNAShuffle(char *xs, char *ys, char *x, char *y);
-
-/*
- * from sqerror.c
- */
-extern void Die(char *format, ...);
-extern void Warn(char *format, ...);
-extern void Panic(char *file, int line);
-
-
-/*
- * from sqio.c
- */
-extern void FreeSequence(char *seq, SQINFO *sqinfo);
-extern int SetSeqinfoString(SQINFO *sqinfo, char *sptr, int flag);
-extern void SeqinfoCopy(SQINFO *sq1, SQINFO *sq2);
-extern void ToDNA(char *seq);
-extern void ToRNA(char *seq);
-extern void ToIUPAC(char *seq, int is_aseq);
-extern int ReadMultipleRseqs(char *seqfile, int fformat, char ***ret_rseqs,
- SQINFO **ret_sqinfo, int *ret_num);
-extern SQFILE *SeqfileOpen(char *filename, int format, char *env);
-extern SQFILE *SeqfileOpenForIndexing(char *filename, int format, char *env, int ssimode);
-extern int SeqfileFormat(FILE *fp);
-extern void SeqfilePosition(SQFILE *sfp, SSIOFFSET *offset);
-extern void SeqfileRewind(SQFILE *sfp);
-extern void SeqfileClose(SQFILE *sfp);
-
-extern int ReadSeq(SQFILE *fp, int format, char **ret_seq, SQINFO *sqinfo);
-extern int GCGBinaryToSequence(char *seq, int len);
-extern int GCGchecksum(char *seq, int seqlen);
-extern int GCGMultchecksum(char **seqs, int nseq);
-extern void WriteSimpleFASTA(FILE *fp, char *seq, char *name, char *desc);
-extern int WriteSeq(FILE *outf, int outfmt, char *seq, SQINFO *sqinfo);
-extern int Seqtype(char *seq);
-extern int GuessAlignmentSeqtype(char **aseq, int nseq);
-extern int String2SeqfileFormat(char *s);
-extern char *SeqfileFormat2String(int code);
-extern SQINFO *MSAToSqinfo(MSA *msa);
-
-/* from squidcore.c
- */
-extern void SqdBanner(FILE *fp, char *banner);
-
-
-/* from sre_ctype.c
- */
-extern int sre_tolower(int c);
-extern int sre_toupper(int c);
-
-/* from sre_math.c
- */
-extern int Linefit(float *x, float *y, int N,
- float *ret_a, float *ret_b, float *ret_r);
-extern void WeightedLinefit(float *x, float *y, float *var, int N,
- float *ret_m, float *ret_b);
-extern double Gammln(double xx);
-extern float **FMX2Alloc(int rows, int cols);
-extern void FMX2Free(float **mx);
-extern double **DMX2Alloc(int rows, int cols);
-extern void DMX2Free(double **mx);
-extern void FMX2Multiply(float **A, float **B, float **C, int m, int p, int n);
-extern double IncompleteGamma(double a, double x);
-
-/* from sre_string.c
- */
-#ifdef NOSTR
-extern char *strstr(char *s, char *subs);
-#endif
-extern char *Strdup(char *s);
-extern void StringChop(char *s);
-extern int Strinsert(char *s1, char c, int pos);
-extern int Strdelete(char *s1, int pos);
-extern void s2lower(char *s);
-extern void s2upper(char *s);
-extern void *sre_malloc(char *file, int line, size_t size);
-extern void *sre_realloc(char *file, int line, void *p, size_t size);
-extern void Free2DArray(void **p, int dim1);
-extern void Free3DArray(void ***p, int dim1, int dim2);
-extern char *RandomSequence(char *alphabet, float *p, int n, int len);
-extern char *sre_fgets(char **buf, int *n, FILE *fp);
-extern int sre_strcat(char **dest, int ldest, char *src, int lsrc);
-extern char *sre_strtok(char **s, char *delim, int *len);
-extern char *sre_strdup(char *s, int n);
-extern char *sre_strncat(char *s1, char *s2, int n);
-extern char *sre_strncpy(char *s1, char *s2, int n);
-extern int IsBlankline(char *s);
-
-/* from stack.c
- */
-extern struct intstack_s *InitIntStack(void);
-extern void PushIntStack(struct intstack_s *stack, int data);
-extern int PopIntStack(struct intstack_s *stack, int *ret_data);
-extern void ReverseIntStack(struct intstack_s *stack);
-extern int FreeIntStack( struct intstack_s *stack );
-
-/*
- * from translate.c
- */
-extern char *Translate(char *seq, char **code);
-
-/*
- * from types.c
- */
-extern int IsInt(char *s);
-extern int IsReal(char *s);
-extern void Byteswap(char *swap, int nbytes);
-#ifndef USE_HOST_BYTESWAP_FUNCTIONS
-extern sqd_uint16 sre_ntoh16(sqd_uint16 netshort);
-extern sqd_uint32 sre_ntoh32(sqd_uint32 netlong);
-extern sqd_uint16 sre_hton16(sqd_uint16 hostshort);
-extern sqd_uint32 sre_hton32(sqd_uint32 hostlong);
-#endif /*!USE_HOST_BYTESWAP_FUNCTIONS*/
-extern sqd_uint64 sre_ntoh64(sqd_uint64 net_int64);
-extern sqd_uint64 sre_hton64(sqd_uint64 host_int64);
-
-/*
- * from weight.c
- */
-extern void GSCWeights(char **aseq, int nseq, int alen, float *wgt);
-extern void VoronoiWeights(char **aseq, int nseq, int alen, float *wgt);
-extern void BlosumWeights(char **aseq, int nseq, int alen, float blosumlevel, float *wgt);
-extern void PositionBasedWeights(char **aseq, int nseq, int alen, float *wgt);
-extern void FilterAlignment(MSA *msa, float cutoff, MSA **ret_new);
-extern void SampleAlignment(MSA *msa, int sample, MSA **ret_new);
-extern void SingleLinkCluster(char **aseq, int nseq, int alen, float maxid,
- int **ret_c, int *ret_nc);
-#endif /* SQFUNCSH_INCLUDED */
diff --git a/squid/sqio.c b/squid/sqio.c
deleted file mode 100644
index ca47cb4..0000000
--- a/squid/sqio.c
+++ /dev/null
@@ -1,1933 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* File: sqio.c
- * From: ureadseq.c in Don Gilbert's sequence i/o package
- *
- * Reads and writes nucleic/protein sequence in various
- * formats. Data files may have multiple sequences.
- *
- * Heavily modified from READSEQ package
- * Copyright (C) 1990 by D.G. Gilbert
- * Biology Dept., Indiana University, Bloomington, IN 47405
- * email: gilbertd at bio.indiana.edu
- * Thanks Don!
- *
- * SRE: Modifications as noted. Fri Jul 3 09:44:54 1992
- * Packaged for squid, Thu Oct 1 10:07:11 1992
- * ANSI conversion in full swing, Mon Jul 12 12:22:21 1993
- *
- * CVS $Id: sqio.c,v 1.32 2003/10/03 18:26:37 eddy Exp $
- *
- *****************************************************************
- * Basic API for single sequence reading:
- *
- * SQFILE *sqfp;
- * char *seqfile;
- * int format; - see squid.h for formats; example: SQFILE_FASTA
- * char *seq;
- * SQINFO sqinfo;
- *
- * if ((sqfp = SeqfileOpen(seqfile, format, "BLASTDB")) == NULL)
- * Die("Failed to open sequence database file %s\n%s\n", seqfile, usage);
- * while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) {
- * do_stuff;
- * FreeSequence(seq, &sqinfo);
- * }
- * SeqfileClose(sqfp);
- *
- *****************************************************************
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-
-#ifndef SEEK_SET
-#include <unistd.h>
-#endif
-
-#include "squid.h"
-#include "msa.h"
-#include "ssi.h"
-
-static void SeqfileGetLine(SQFILE *V);
-
-#define kStartLength 500
-
-static char *aminos = "ABCDEFGHIKLMNPQRSTVWXYZ*";
-static char *primenuc = "ACGTUN";
-static char *protonly = "EFIPQZ";
-
-static SQFILE *seqfile_open(char *filename, int format, char *env, int ssimode);
-
-/* Function: SeqfileOpen()
- *
- * Purpose : Open a sequence database file and prepare for reading
- * sequentially.
- *
- * Args: filename - name of file to open
- * format - format of file
- * env - environment variable for path (e.g. BLASTDB)
- * ssimode - -1, SSI_OFFSET_I32, or SSI_OFFSET_I64
- *
- * Returns opened SQFILE ptr, or NULL on failure.
- */
-SQFILE *
-SeqfileOpen(char *filename, int format, char *env)
-{
- return seqfile_open(filename, format, env, -1);
-}
-SQFILE *
-SeqfileOpenForIndexing(char *filename, int format, char *env, int ssimode)
-{
- return seqfile_open(filename, format, env, ssimode);
-}
-static SQFILE *
-seqfile_open(char *filename, int format, char *env, int ssimode)
-{
- SQFILE *dbfp;
-
- dbfp = (SQFILE *) MallocOrDie (sizeof(SQFILE));
-
- dbfp->ssimode = ssimode;
- dbfp->rpl = -1; /* flag meaning "unset" */
- dbfp->lastrpl = 0;
- dbfp->maxrpl = 0;
- dbfp->bpl = -1; /* flag meaning "unset" */
- dbfp->lastbpl = 0;
- dbfp->maxbpl = 0;
-
- /* Open our file handle.
- * Three possibilities:
- * 1. normal file open
- * 2. filename = "-"; read from stdin
- * 3. filename = "*.gz"; read thru pipe from gzip
- * If we're reading from stdin or a pipe, we can't reliably
- * back up, so we can't do two-pass parsers like the interleaved alignment
- * formats.
- */
- if (strcmp(filename, "-") == 0)
- {
- dbfp->f = stdin;
- dbfp->do_stdin = TRUE;
- dbfp->do_gzip = FALSE;
- dbfp->fname = sre_strdup("[STDIN]", -1);
- }
-#ifndef SRE_STRICT_ANSI
- /* popen(), pclose() aren't portable to non-POSIX systems; disable */
- else if (Strparse("^.*\\.gz$", filename, 0))
- {
- char cmd[256];
-
- /* Note that popen() will return "successfully"
- * if file doesn't exist, because gzip works fine
- * and prints an error! So we have to check for
- * existence of file ourself.
- */
- if (! FileExists(filename))
- Die("%s: file does not exist", filename);
-
- if (strlen(filename) + strlen("gzip -dc ") >= 256)
- Die("filename > 255 char in SeqfileOpen()");
- sprintf(cmd, "gzip -dc %s", filename);
- if ((dbfp->f = popen(cmd, "r")) == NULL)
- return NULL;
-
- dbfp->do_stdin = FALSE;
- dbfp->do_gzip = TRUE;
- dbfp->fname = sre_strdup(filename, -1);
- }
-#endif /*SRE_STRICT_ANSI*/
- else
- {
- if ((dbfp->f = fopen(filename, "r")) == NULL &&
- (dbfp->f = EnvFileOpen(filename, env, NULL)) == NULL)
- return NULL;
-
- dbfp->do_stdin = FALSE;
- dbfp->do_gzip = FALSE;
- dbfp->fname = sre_strdup(filename, -1);
- }
-
-
- /* Invoke autodetection if we haven't already been told what
- * to expect.
- */
- if (format == SQFILE_UNKNOWN)
- {
- if (dbfp->do_stdin == TRUE || dbfp->do_gzip)
- Die("Can't autodetect sequence file format from a stdin or gzip pipe");
- format = SeqfileFormat(dbfp->f);
- if (format == SQFILE_UNKNOWN)
- Die("Can't determine format of sequence file %s", dbfp->fname);
- }
-
- /* The hack for sequential access of an interleaved alignment file:
- * read the alignment in, we'll copy sequences out one at a time.
- */
- dbfp->msa = NULL;
- dbfp->afp = NULL;
- dbfp->format = format;
- dbfp->linenumber = 0;
- dbfp->buf = NULL;
- dbfp->buflen = 0;
- if (IsAlignmentFormat(format))
- {
- /* We'll be reading from the MSA interface. Copy our data
- * to the MSA afp's structure.
- */
- dbfp->afp = MallocOrDie(sizeof(MSAFILE));
- dbfp->afp->f = dbfp->f; /* just a ptr, don't close */
- dbfp->afp->do_stdin = dbfp->do_stdin;
- dbfp->afp->do_gzip = dbfp->do_gzip;
- dbfp->afp->fname = dbfp->fname; /* just a ptr, don't free */
- dbfp->afp->format = dbfp->format; /* e.g. format */
- dbfp->afp->linenumber = dbfp->linenumber; /* e.g. 0 */
- dbfp->afp->buf = NULL;
- dbfp->afp->buflen = 0;
-
- if ((dbfp->msa = MSAFileRead(dbfp->afp)) == NULL)
- Die("Failed to read any alignment data from file %s", dbfp->fname);
- /* hack: overload/reuse msa->lastidx; indicates
- next seq to return upon a ReadSeq() call */
- dbfp->msa->lastidx = 0;
-
- return dbfp;
- }
-
- /* Load the first line.
- */
- SeqfileGetLine(dbfp);
- return dbfp;
-}
-
-/* Function: SeqfilePosition()
- *
- * Purpose: Move to a particular offset in a seqfile.
- * Will not work on alignment files.
- */
-void
-SeqfilePosition(SQFILE *sqfp, SSIOFFSET *offset)
-{
- if (sqfp->do_stdin || sqfp->do_gzip || IsAlignmentFormat(sqfp->format))
- Die("SeqfilePosition() failed: in a nonrewindable data file or stream");
-
- if (SSISetFilePosition(sqfp->f, offset) != 0)
- Die("SSISetFilePosition failed, but that shouldn't happen.");
- SeqfileGetLine(sqfp);
-}
-
-
-/* Function: SeqfileRewind()
- *
- * Purpose: Set a sequence file back to the first sequence.
- *
- * Won't work on alignment files. Although it would
- * seem that it could (just set msa->lastidx back to 0),
- * that'll fail on "multiple multiple" alignment file formats
- * (e.g. Stockholm).
- */
-void
-SeqfileRewind(SQFILE *sqfp)
-{
- if (sqfp->do_stdin || sqfp->do_gzip)
- Die("SeqfileRewind() failed: in a nonrewindable data file or stream");
-
- rewind(sqfp->f);
- SeqfileGetLine(sqfp);
-}
-
-/* Function: SeqfileLineParameters()
- * Date: SRE, Thu Feb 15 17:00:41 2001 [St. Louis]
- *
- * Purpose: After all the sequences have been read from the file,
- * but before closing it, retrieve overall bytes-per-line and
- * residues-per-line info. If non-zero, these mean that
- * the file contains homogeneous sequence line lengths (except
- * the last line in each record).
- *
- * If either of bpl or rpl is determined to be inhomogeneous,
- * both are returned as 0.
- *
- * Args: *sqfp - an open but fully read sequence file
- * ret_bpl - RETURN: bytes per line, or 0 if inhomogeneous
- * ret_rpl - RETURN: residues per line, or 0 if inhomogenous.
- *
- * Returns: void
- */
-void
-SeqfileLineParameters(SQFILE *V, int *ret_bpl, int *ret_rpl)
-{
- if (V->rpl > 0 && V->maxrpl == V->rpl &&
- V->bpl > 0 && V->maxbpl == V->bpl) {
- *ret_bpl = V->bpl;
- *ret_rpl = V->rpl;
- } else {
- *ret_bpl = 0;
- *ret_rpl = 0;
- }
-}
-
-
-void
-SeqfileClose(SQFILE *sqfp)
-{
- /* note: don't test for sqfp->msa being NULL. Now that
- * we're holding afp open and allowing access to multi-MSA
- * databases (e.g. Stockholm format, Pfam), msa ends
- * up being NULL when we run out of alignments.
- */
- if (sqfp->afp != NULL) {
- if (sqfp->msa != NULL) MSAFree(sqfp->msa);
- if (sqfp->afp->buf != NULL) free(sqfp->afp->buf);
- free(sqfp->afp);
- }
-#ifndef SRE_STRICT_ANSI /* gunzip functionality only on POSIX systems */
- if (sqfp->do_gzip) pclose(sqfp->f);
-#endif
- else if (! sqfp->do_stdin) fclose(sqfp->f);
- if (sqfp->buf != NULL) free(sqfp->buf);
- if (sqfp->fname != NULL) free(sqfp->fname);
- free(sqfp);
-}
-
-
-/* Function: SeqfileGetLine()
- * Date: SRE, Tue Jun 22 09:15:49 1999 [Sanger Centre]
- *
- * Purpose: read a line from a sequence file into V->buf
- * If the fgets() is NULL, sets V->buf[0] to '\0'.
- *
- * Args: V
- *
- * Returns: void
- */
-static void
-SeqfileGetLine(SQFILE *V)
-{
- if (V->ssimode >= 0)
- if (0 != SSIGetFilePosition(V->f, V->ssimode, &(V->ssioffset)))
- Die("SSIGetFilePosition() failed");
- if (sre_fgets(&(V->buf), &(V->buflen), V->f) == NULL)
- *(V->buf) = '\0';
- V->linenumber++;
-}
-
-
-void
-FreeSequence(char *seq, SQINFO *sqinfo)
-{
- if (seq != NULL) free(seq);
- if (sqinfo->flags & SQINFO_SS) free(sqinfo->ss);
- if (sqinfo->flags & SQINFO_SA) free(sqinfo->sa);
-}
-
-int
-SetSeqinfoString(SQINFO *sqinfo, char *sptr, int flag)
-{
- int len;
- int pos;
-
- /* silently ignore NULL. */
- if (sptr == NULL) return 1;
-
- while (*sptr == ' ') sptr++; /* ignore leading whitespace */
- for (pos = strlen(sptr)-1; pos >= 0; pos--)
- if (! isspace((int) sptr[pos])) break;
- sptr[pos+1] = '\0'; /* ignore trailing whitespace */
-
- switch (flag) {
- case SQINFO_NAME:
- if (*sptr != '-')
- {
- strncpy(sqinfo->name, sptr, SQINFO_NAMELEN-1);
- sqinfo->name[SQINFO_NAMELEN-1] = '\0';
- sqinfo->flags |= SQINFO_NAME;
- }
- break;
-
- case SQINFO_ID:
- if (*sptr != '-')
- {
- strncpy(sqinfo->id, sptr, SQINFO_NAMELEN-1);
- sqinfo->id[SQINFO_NAMELEN-1] = '\0';
- sqinfo->flags |= SQINFO_ID;
- }
- break;
-
- case SQINFO_ACC:
- if (*sptr != '-')
- {
- strncpy(sqinfo->acc, sptr, SQINFO_NAMELEN-1);
- sqinfo->acc[SQINFO_NAMELEN-1] = '\0';
- sqinfo->flags |= SQINFO_ACC;
- }
- break;
-
- case SQINFO_DESC:
- if (*sptr != '-')
- {
- if (sqinfo->flags & SQINFO_DESC) /* append? */
- {
- len = strlen(sqinfo->desc);
- if (len < SQINFO_DESCLEN-2) /* is there room? */
- {
- strncat(sqinfo->desc, " ", SQINFO_DESCLEN-1-len); len++;
- strncat(sqinfo->desc, sptr, SQINFO_DESCLEN-1-len);
- }
- }
- else /* else copy */
- strncpy(sqinfo->desc, sptr, SQINFO_DESCLEN-1);
- sqinfo->desc[SQINFO_DESCLEN-1] = '\0';
- sqinfo->flags |= SQINFO_DESC;
- }
- break;
-
- case SQINFO_START:
- if (!IsInt(sptr)) { squid_errno = SQERR_FORMAT; return 0; }
- sqinfo->start = atoi(sptr);
- if (sqinfo->start != 0) sqinfo->flags |= SQINFO_START;
- break;
-
- case SQINFO_STOP:
- if (!IsInt(sptr)) { squid_errno = SQERR_FORMAT; return 0; }
- sqinfo->stop = atoi(sptr);
- if (sqinfo->stop != 0) sqinfo->flags |= SQINFO_STOP;
- break;
-
- case SQINFO_OLEN:
- if (!IsInt(sptr)) { squid_errno = SQERR_FORMAT; return 0; }
- sqinfo->olen = atoi(sptr);
- if (sqinfo->olen != 0) sqinfo->flags |= SQINFO_OLEN;
- break;
-
- default:
- Die("Invalid flag %d to SetSeqinfoString()", flag);
- }
- return 1;
-}
-
-void
-SeqinfoCopy(SQINFO *sq1, SQINFO *sq2)
-{
- sq1->flags = sq2->flags;
- if (sq2->flags & SQINFO_NAME) strcpy(sq1->name, sq2->name);
- if (sq2->flags & SQINFO_ID) strcpy(sq1->id, sq2->id);
- if (sq2->flags & SQINFO_ACC) strcpy(sq1->acc, sq2->acc);
- if (sq2->flags & SQINFO_DESC) strcpy(sq1->desc, sq2->desc);
- if (sq2->flags & SQINFO_LEN) sq1->len = sq2->len;
- if (sq2->flags & SQINFO_START) sq1->start = sq2->start;
- if (sq2->flags & SQINFO_STOP) sq1->stop = sq2->stop;
- if (sq2->flags & SQINFO_OLEN) sq1->olen = sq2->olen;
- if (sq2->flags & SQINFO_TYPE) sq1->type = sq2->type;
- if (sq2->flags & SQINFO_SS) sq1->ss = Strdup(sq2->ss);
- if (sq2->flags & SQINFO_SA) sq1->sa = Strdup(sq2->sa);
-}
-
-/* Function: ToDNA()
- *
- * Purpose: Convert a sequence to DNA.
- * U --> T
- */
-void
-ToDNA(char *seq)
-{
- for (; *seq != '\0'; seq++)
- {
- if (*seq == 'U') *seq = 'T';
- else if (*seq == 'u') *seq = 't';
- }
-}
-
-/* Function: ToRNA()
- *
- * Purpose: Convert a sequence to RNA.
- * T --> U
- */
-void
-ToRNA(char *seq)
-{
- for (; *seq != '\0'; seq++)
- {
- if (*seq == 'T') *seq = 'U';
- else if (*seq == 't') *seq = 'u';
- }
-}
-
-
-/* Function: ToIUPAC()
- *
- * Purpose: Convert X's, o's, other junk in a nucleic acid sequence to N's,
- * to comply with IUPAC code. If is_aseq is TRUE, will allow gap
- * characters though, so we can call ToIUPAC() on aligned seqs.
- *
- * NUCLEOTIDES is defined in squid.h as:
- * "ACGTUNRYMKSWHBVDacgtunrymkswhbvd"
- * gap chars allowed by isgap() are defined in squid.h as:
- * " ._-~"
- *
- * WU-BLAST's pressdb will
- * choke on X's, for instance, necessitating conversion
- * of certain genome centers' data.
- */
-void
-ToIUPAC(char *seq, int is_aseq)
-{
- if (is_aseq) {
- for (; *seq != '\0'; seq++)
- if (strchr(NUCLEOTIDES, *seq) == NULL && ! isgap(*seq)) *seq = 'N';
- } else {
- for (; *seq != '\0'; seq++)
- if (strchr(NUCLEOTIDES, *seq) == NULL) *seq = 'N';
- }
-}
-
-
-/* Function: addseq()
- *
- * Purpose: Add a line of sequence to the growing string in V.
- *
- * In the seven supported unaligned formats, all sequence
- * lines may contain whitespace that must be filtered out;
- * four formats (PIR, EMBL, Genbank, GCG) include coordinates
- * that must be filtered out. Thus an (!isdigit && !isspace)
- * test on each character before we accept it.
- */
-static void
-addseq(char *s, struct ReadSeqVars *V)
-{
- char *s0;
- char *sq;
- int rpl; /* valid residues per line */
- int bpl; /* characters per line */
-
- if (V->ssimode == -1)
- { /* Normal mode: keeping the seq */
- /* Make sure we have enough room. We know that s is <= buflen,
- * so just make sure we've got room for a whole new buflen worth
- * of sequence.
- */
- if (V->seqlen + V->buflen > V->maxseq) {
- V->maxseq += MAX(V->buflen, kStartLength);
- V->seq = ReallocOrDie (V->seq, V->maxseq+1);
- }
-
- sq = V->seq + V->seqlen;
- while (*s != 0) {
- if (! isdigit((int) *s) && ! isspace((int) *s)) {
- *sq = *s;
- sq++;
- }
- s++;
- }
- V->seqlen = sq - V->seq;
- }
- else /* else: indexing mode, discard the seq */
- {
- s0 = s;
- rpl = 0;
- while (*s != 0) {
- if (! isdigit((int) *s) && ! isspace((int) *s)) {
- rpl++;
- }
- s++;
- }
- V->seqlen += rpl;
- bpl = s - s0;
-
- /* Keep track of the global rpl, bpl for the file.
- * This is overly complicated because we have to
- * allow the last line of each record (e.g. the last addseq() call
- * on each sequence) to have a different length - and sometimes
- * we'll have one-line sequence records, too. Thus we only
- * do something with the global V->rpl when we have *passed over*
- * a line - we keep the last line's rpl in last_rpl. And because
- * a file might consist entirely of single-line records, we keep
- * a third guy, maxrpl, that tells us the maximum rpl of any line
- * in the file. If we reach the end of file and rpl is still unset,
- * we'll set it to maxrpl. If we reach eof and rpl is set, but is
- * less than maxrpl, that's a weird case where a last line in some
- * record is longer than every other line.
- */
- if (V->rpl != 0) { /* 0 means we already know rpl is invalid */
- if (V->lastrpl > 0) { /* we're on something that's not the first line */
- if (V->rpl > 0 && V->lastrpl != V->rpl) V->rpl = 0;
- else if (V->rpl == -1) V->rpl = V->lastrpl;
- }
- V->lastrpl = rpl;
- if (rpl > V->maxrpl) V->maxrpl = rpl; /* make sure we check max length of final lines */
- }
- if (V->bpl != 0) { /* 0 means we already know bpl is invalid */
- if (V->lastbpl > 0) { /* we're on something that's not the first line */
- if (V->bpl > 0 && V->lastbpl != V->bpl) V->bpl = 0;
- else if (V->bpl == -1) V->bpl = V->lastbpl;
- }
- V->lastbpl = bpl;
- if (bpl > V->maxbpl) V->maxbpl = bpl; /* make sure we check max length of final lines */
- }
- } /* end of indexing mode of addseq(). */
-
-}
-
-static void
-readLoop(int addfirst, int (*endTest)(char *,int *), struct ReadSeqVars *V)
-{
- int addend = 0;
- int done = 0;
-
- V->seqlen = 0;
- V->lastrpl = V->lastbpl = 0;
- if (addfirst) {
- if (V->ssimode >= 0) V->d_off = V->ssioffset;
- addseq(V->buf, V);
- } else if (V->ssimode >= 0)
- if (0 != SSIGetFilePosition(V->f, V->ssimode, &(V->d_off)))
- Die("SSIGetFilePosition() failed");
-
- do {
- SeqfileGetLine(V);
- /* feof() alone is a bug; files not necessarily \n terminated */
- if (*(V->buf) == '\0' && feof(V->f))
- done = TRUE;
- done |= (*endTest)(V->buf, &addend);
- if (addend || !done)
- addseq(V->buf, V);
- } while (!done);
-}
-
-
-static int
-endPIR(char *s, int *addend)
-{
- *addend = 0;
- if ((strncmp(s, "///", 3) == 0) ||
- (strncmp(s, "ENTRY", 5) == 0))
- return 1;
- else
- return 0;
-}
-
-static void
-readPIR(struct ReadSeqVars *V)
-{
- char *sptr;
- /* load first line of entry */
- while (!feof(V->f) && strncmp(V->buf, "ENTRY", 5) != 0) {
- SeqfileGetLine(V);
- }
- if (feof(V->f)) return;
- if (V->ssimode >= 0) V->r_off = V->ssioffset;
-
- if ((sptr = strtok(V->buf + 15, "\n\t ")) != NULL)
- {
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID);
- }
- do {
- SeqfileGetLine(V);
- if (!feof(V->f) && strncmp(V->buf, "TITLE", 5) == 0)
- SetSeqinfoString(V->sqinfo, V->buf+15, SQINFO_DESC);
- else if (!feof(V->f) && strncmp(V->buf, "ACCESSION", 9) == 0)
- {
- if ((sptr = strtok(V->buf+15, " \t\n")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC);
- }
- } while (! feof(V->f) && (strncmp(V->buf,"SEQUENCE", 8) != 0));
- SeqfileGetLine(V); /* skip next line, coords */
-
- readLoop(0, endPIR, V);
-
- /* reading a real PIR-CODATA database file, we keep the source coords
- */
- V->sqinfo->start = 1;
- V->sqinfo->stop = V->seqlen;
- V->sqinfo->olen = V->seqlen;
- V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN;
-
- /* get next line
- */
- while (!feof(V->f) && strncmp(V->buf, "ENTRY", 5) != 0) {
- SeqfileGetLine(V);
- }
-}
-
-
-
-static int
-endIG(char *s, int *addend)
-{
- *addend = 1; /* 1 or 2 occur in line w/ bases */
- return((strchr(s,'1')!=NULL) || (strchr(s,'2')!=NULL));
-}
-
-static void
-readIG(struct ReadSeqVars *V)
-{
- char *nm;
- /* position past ';' comments */
- do {
- SeqfileGetLine(V);
- } while (! (feof(V->f) || ((*V->buf != 0) && (*V->buf != ';')) ));
-
- if (!feof(V->f))
- {
- if ((nm = strtok(V->buf, "\n\t ")) != NULL)
- SetSeqinfoString(V->sqinfo, nm, SQINFO_NAME);
-
- readLoop(0, endIG, V);
- }
-
- while (!(feof(V->f) || ((*V->buf != '\0') && (*V->buf == ';'))))
- SeqfileGetLine(V);
-}
-
-static int
-endStrider(char *s, int *addend)
-{
- *addend = 0;
- return (strstr( s, "//") != NULL);
-}
-
-static void
-readStrider(struct ReadSeqVars *V)
-{
- char *nm;
-
- while ((!feof(V->f)) && (*V->buf == ';'))
- {
- if (strncmp(V->buf,"; DNA sequence", 14) == 0)
- {
- if ((nm = strtok(V->buf+16, ",\n\t ")) != NULL)
- SetSeqinfoString(V->sqinfo, nm, SQINFO_NAME);
- }
- SeqfileGetLine(V);
- }
-
- if (! feof(V->f))
- readLoop(1, endStrider, V);
-
- /* load next line
- */
- while ((!feof(V->f)) && (*V->buf != ';'))
- SeqfileGetLine(V);
-}
-
-
-static int
-endGB(char *s, int *addend)
-{
- *addend = 0;
- return ((strstr(s,"//") != NULL) || (strstr(s,"LOCUS") == s));
-}
-
-static void
-readGenBank(struct ReadSeqVars *V)
-{
- char *sptr;
- int in_definition;
-
- /* We'll map three genbank identifiers onto names:
- * LOCUS -> sqinfo.name
- * ACCESSION -> sqinfo.acc [primary accession only]
- * VERSION -> sqinfo.id
- * We don't currently store the GI number, or secondary accessions.
- */
- while (strncmp(V->buf, "LOCUS", 5) != 0) {
- SeqfileGetLine(V);
- }
- if (V->ssimode >= 0) V->r_off = V->ssioffset;
-
- if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
-
- in_definition = FALSE;
- while (! feof(V->f))
- {
- SeqfileGetLine(V);
- if (! feof(V->f) && strstr(V->buf, "DEFINITION") == V->buf)
- {
- if ((sptr = strtok(V->buf+12, "\n")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);
- in_definition = TRUE;
- }
- else if (! feof(V->f) && strstr(V->buf, "ACCESSION") == V->buf)
- {
- if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC);
- in_definition = FALSE;
- }
- else if (! feof(V->f) && strstr(V->buf, "VERSION") == V->buf)
- {
- if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID);
- in_definition = FALSE;
- }
- else if (strncmp(V->buf,"ORIGIN", 6) != 0)
- {
- if (in_definition)
- SetSeqinfoString(V->sqinfo, V->buf, SQINFO_DESC);
- }
- else
- break;
- }
-
- readLoop(0, endGB, V);
-
- /* reading a real GenBank database file, we keep the source coords
- */
- V->sqinfo->start = 1;
- V->sqinfo->stop = V->seqlen;
- V->sqinfo->olen = V->seqlen;
- V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN;
-
-
- while (!(feof(V->f) || ((*V->buf!=0) && (strstr(V->buf,"LOCUS") == V->buf))))
- SeqfileGetLine(V);
- /* SRE: V->s now holds "//", so sequential
- reads are wedged: fixed Tue Jul 13 1993 */
- while (!feof(V->f) && strstr(V->buf, "LOCUS ") != V->buf)
- SeqfileGetLine(V);
-}
-
-static int
-endGCGdata(char *s, int *addend)
-{
- *addend = 0;
- return (*s == '>');
-}
-
-static void
-readGCGdata(struct ReadSeqVars *V)
-{
- int binary = FALSE; /* whether data are binary or not */
- int blen = 0; /* length of binary sequence */
-
- /* first line contains ">>>>" followed by name */
- if (Strparse(">>>>([^ ]+) .+2BIT +Len: ([0-9]+)", V->buf, 2))
- {
- binary = TRUE;
- SetSeqinfoString(V->sqinfo, sqd_parse[1], SQINFO_NAME);
- blen = atoi(sqd_parse[2]);
- }
- else if (Strparse(">>>>([^ ]+) .+ASCII +Len: [0-9]+", V->buf, 1))
- SetSeqinfoString(V->sqinfo, sqd_parse[1], SQINFO_NAME);
- else
- Die("bogus GCGdata format? %s", V->buf);
-
- /* second line contains free text description */
- SeqfileGetLine(V);
- SetSeqinfoString(V->sqinfo, V->buf, SQINFO_DESC);
-
- if (binary) {
- /* allocate for blen characters +3... (allow for 3 bytes of slop) */
- if (blen >= V->maxseq) {
- V->maxseq = blen;
- if ((V->seq = (char *) realloc (V->seq, sizeof(char)*(V->maxseq+4)))==NULL)
- Die("malloc failed");
- }
- /* read (blen+3)/4 bytes from file */
- if (fread(V->seq, sizeof(char), (blen+3)/4, V->f) < (size_t) ((blen+3)/4))
- Die("fread failed");
- V->seqlen = blen;
- /* convert binary code to seq */
- GCGBinaryToSequence(V->seq, blen);
- }
- else readLoop(0, endGCGdata, V);
-
- while (!(feof(V->f) || ((*V->buf != 0) && (*V->buf == '>'))))
- SeqfileGetLine(V);
-}
-
-static int
-endPearson(char *s, int *addend)
-{
- *addend = 0;
- return(*s == '>');
-}
-
-static void
-readPearson(struct ReadSeqVars *V)
-{
- char *sptr;
-
- if (V->ssimode >= 0) V->r_off = V->ssioffset;
-
- if (*V->buf != '>')
- Die("\
-File %s does not appear to be in FASTA format at line %d.\n\
-You may want to specify the file format on the command line.\n\
-Usually this is done with an option --informat <fmt>.\n",
- V->fname, V->linenumber);
-
- if ((sptr = strtok(V->buf+1, "\n\t ")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
- if ((sptr = strtok(NULL, "\n")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);
-
- readLoop(0, endPearson, V);
-
- while (!(feof(V->f) || ((*V->buf != 0) && (*V->buf == '>')))) {
- SeqfileGetLine(V);
- }
-}
-
-
-static int
-endEMBL(char *s, int *addend)
-{
- *addend = 0;
- /* Some people (Berlin 5S rRNA database, f'r instance) use
- * an extended EMBL format that attaches extra data after
- * the sequence -- watch out for that. We use the fact that
- * real EMBL sequence lines begin with five spaces.
- *
- * We can use this as the sole end test because readEMBL() will
- * advance to the next ID line before starting to read again.
- */
- return (strncmp(s," ",5) != 0);
-/* return ((strstr(s,"//") != NULL) || (strstr(s,"ID ") == s)); */
-}
-
-static void
-readEMBL(struct ReadSeqVars *V)
-{
- char *sptr;
- int i;
-
- /* make sure we have first line */
- while (!feof(V->f) && strncmp(V->buf, "ID ", 4) != 0) {
- SeqfileGetLine(V);
- }
- if (V->ssimode >= 0) V->r_off = V->ssioffset;
-
- if ((sptr = strtok(V->buf+5, "\n\t ")) != NULL)
- {
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID);
- }
-
- do {
- SeqfileGetLine(V);
- if (!feof(V->f) && strstr(V->buf, "AC ") == V->buf)
- {
- if ((sptr = strtok(V->buf+5, "; \t\n")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC);
- }
- else if (!feof(V->f) && strstr(V->buf, "DE ") == V->buf)
- {
- if ((sptr = strtok(V->buf+5, "\n")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);
- }
- } while (! feof(V->f) && strncmp(V->buf,"SQ",2) != 0);
-
- readLoop(0, endEMBL, V);
-
- /* Hack for Staden experiment files: convert - to N.
- *
- * You may not treat V->seq as a string yet; it is not null terminated
- * until we return and ReadSeq() polishes it off.
- * [bug #h25; xref STL7 p.121]
- */
- if (V->ssimode == -1) /* if we're in ssi mode, we're not keeping the seq */
- for (i = 0; i < V->seqlen; i++)
- if (V->seq[i] == '-') V->seq[i] = 'N';
-
- /* reading a real EMBL database file, we keep the source coords
- */
- V->sqinfo->start = 1;
- V->sqinfo->stop = V->seqlen;
- V->sqinfo->olen = V->seqlen;
- V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN;
-
- /* load next record's ID line */
- while (!feof(V->f) && strncmp(V->buf, "ID ", 4) != 0) {
- SeqfileGetLine(V);
- }
-
-}
-
-
-static int
-endZuker(char *s, int *addend)
-{
- *addend = 0;
- return( *s == '(' );
-}
-
-static void
-readZuker(struct ReadSeqVars *V)
-{
- char *sptr;
-
- SeqfileGetLine(V); /*s == "seqLen seqid string..."*/
-
- if ((sptr = strtok(V->buf+6, " \t\n")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
-
- if ((sptr = strtok(NULL, "\n")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);
-
- readLoop(0, endZuker, V);
-
- while (!(feof(V->f) | ((*V->buf != '\0') & (*V->buf == '('))))
- SeqfileGetLine(V);
-}
-
-static void
-readUWGCG(struct ReadSeqVars *V)
-{
- char *si;
- char *sptr;
- int done;
-
- V->seqlen = 0;
-
- /*writeseq: " %s Length: %d (today) Check: %d ..\n" */
- /*drop above or ".." from id*/
- if ((si = strstr(V->buf," Length: ")) != NULL) *si = 0;
- else if ((si = strstr(V->buf,"..")) != NULL) *si = 0;
-
- if ((sptr = strtok(V->buf, "\n\t ")) != NULL)
- SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
-
- do {
- done = feof(V->f);
- SeqfileGetLine(V);
- if (! done) addseq(V->buf, V);
- } while (!done);
-}
-
-
-/* Function: ReadSeq()
- *
- * Purpose: Read next sequence from an open database file.
- * Return the sequence and associated info.
- *
- * Args: fp - open sequence database file pointer
- * format - format of the file (previously determined
- * by call to SeqfileFormat()).
- * Currently unused, since we carry it in V.
- * ret_seq - RETURN: sequence
- * sqinfo - RETURN: filled in w/ other information
- *
- * Limitations: uses squid_errno, so it's not threadsafe.
- *
- * Return: 1 on success, 0 on failure.
- * ret_seq and some field of sqinfo are allocated here,
- * The preferred call mechanism to properly free the memory is:
- *
- * SQINFO sqinfo;
- * char *seq;
- *
- * ReadSeq(fp, format, &seq, &sqinfo);
- * ... do something...
- * FreeSequence(seq, &sqinfo);
- */
-int
-ReadSeq(SQFILE *V, int format, char **ret_seq, SQINFO *sqinfo)
-{
- int gotuw;
-
- squid_errno = SQERR_OK;
-
- /* Here's the hack for sequential access of sequences from
- * the multiple sequence alignment formats
- */
- if (IsAlignmentFormat(V->format))
- {
- if (V->msa->lastidx >= V->msa->nseq)
- { /* out of data. try to read another alignment */
- MSAFree(V->msa);
- if ((V->msa = MSAFileRead(V->afp)) == NULL)
- return 0;
- V->msa->lastidx = 0;
- }
- /* copy and dealign the appropriate aligned seq */
- MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen,
- V->msa->aseq[V->msa->lastidx], &(V->seq));
- V->seqlen = strlen(V->seq);
-
- /* Extract sqinfo stuff for this sequence from the msa.
- * Tedious; code that should be cleaned.
- */
- sqinfo->flags = 0;
- if (V->msa->sqname[V->msa->lastidx] != NULL)
- SetSeqinfoString(sqinfo, V->msa->sqname[V->msa->lastidx], SQINFO_NAME);
- if (V->msa->sqacc != NULL && V->msa->sqacc[V->msa->lastidx] != NULL)
- SetSeqinfoString(sqinfo, V->msa->sqacc[V->msa->lastidx], SQINFO_ACC);
- if (V->msa->sqdesc != NULL && V->msa->sqdesc[V->msa->lastidx] != NULL)
- SetSeqinfoString(sqinfo, V->msa->sqdesc[V->msa->lastidx], SQINFO_DESC);
- if (V->msa->ss != NULL && V->msa->ss[V->msa->lastidx] != NULL) {
- MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen,
- V->msa->ss[V->msa->lastidx], &(sqinfo->ss));
- sqinfo->flags |= SQINFO_SS;
- }
- if (V->msa->sa != NULL && V->msa->sa[V->msa->lastidx] != NULL) {
- MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen,
- V->msa->sa[V->msa->lastidx], &(sqinfo->sa));
- sqinfo->flags |= SQINFO_SA;
- }
- V->msa->lastidx++;
- }
- else {
- if (feof(V->f)) return 0;
-
- if (V->ssimode == -1) { /* normal mode */
- V->seq = (char*) calloc (kStartLength+1, sizeof(char));
- V->maxseq = kStartLength;
- } else { /* index mode: discarding seq */
- V->seq = NULL;
- V->maxseq = 0;
- }
- V->seqlen = 0;
- V->sqinfo = sqinfo;
- V->sqinfo->flags = 0;
-
- switch (V->format) {
- case SQFILE_IG : readIG(V); break;
- case SQFILE_STRIDER : readStrider(V); break;
- case SQFILE_GENBANK : readGenBank(V); break;
- case SQFILE_FASTA : readPearson(V); break;
- case SQFILE_EMBL : readEMBL(V); break;
- case SQFILE_ZUKER : readZuker(V); break;
- case SQFILE_PIR : readPIR(V); break;
- case SQFILE_GCGDATA : readGCGdata(V); break;
-
- case SQFILE_GCG :
- do { /* skip leading comments on GCG file */
- gotuw = (strstr(V->buf,"..") != NULL);
- if (gotuw) readUWGCG(V);
- SeqfileGetLine(V);
- } while (! feof(V->f));
- break;
-
- case SQFILE_IDRAW: /* SRE: no attempt to read idraw postscript */
- default:
- squid_errno = SQERR_FORMAT;
- free(V->seq);
- return 0;
- }
- if (V->seq != NULL) /* (yes, it can be NULL, in indexing mode) */
- V->seq[V->seqlen] = '\0'; /* stick a string terminator on it */
- }
-
- /* Cleanup
- */
- sqinfo->len = V->seqlen;
- sqinfo->flags |= SQINFO_LEN;
- *ret_seq = V->seq;
- if (squid_errno == SQERR_OK) return 1; else return 0;
-}
-
-/* Function: SeqfileFormat()
- * Date: SRE, Tue Jun 22 10:58:58 1999 [Sanger Centre]
- *
- * Purpose: Determine format of an open file.
- * Returns format code.
- * Rewinds the file.
- *
- * Autodetects the following unaligned formats:
- * SQFILE_FASTA
- * SQFILE_GENBANK
- * SQFILE_EMBL
- * SQFILE_GCG
- * SQFILE_GCGDATA
- * SQFILE_PIR
- * Also autodetects the following alignment formats:
- * MSAFILE_STOCKHOLM
- * MSAFILE_MSF
- * MSAFILE_CLUSTAL
- * MSAFILE_SELEX
- * MSAFILE_PHYLIP
- *
- * Can't autodetect MSAFILE_A2M, calls it SQFILE_FASTA.
- * MSAFileFormat() does the opposite.
- *
- * Args: sfp - open SQFILE
- *
- * Return: format code, or SQFILE_UNKNOWN if unrecognized
- */
-int
-SeqfileFormat(FILE *fp)
-{
- char *buf;
- int len;
- int fmt = SQFILE_UNKNOWN;
- int ndataline;
- char *bufcpy, *s, *s1, *s2;
- int has_junk;
-
- buf = NULL;
- len = 0;
- ndataline = 0;
- has_junk = FALSE;
- while (sre_fgets(&buf, &len, fp) != NULL)
- {
- if (IsBlankline(buf)) continue;
-
- /* Well-behaved formats identify themselves in first nonblank line.
- */
- if (ndataline == 0)
- {
- if (strncmp(buf, ">>>>", 4) == 0 && strstr(buf, "Len: "))
- { fmt = SQFILE_GCGDATA; goto DONE; }
-
- if (buf[0] == '>')
- { fmt = SQFILE_FASTA; goto DONE; }
-
- if (strncmp(buf, "!!AA_SEQUENCE", 13) == 0 ||
- strncmp(buf, "!!NA_SEQUENCE", 13) == 0)
- { fmt = SQFILE_GCG; goto DONE; }
-
- if (strncmp(buf, "# STOCKHOLM 1.", 14) == 0)
- { fmt = MSAFILE_STOCKHOLM; goto DONE; }
-
- if (strncmp(buf, "CLUSTAL", 7) == 0 &&
- strstr(buf, "multiple sequence alignment") != NULL)
- { fmt = MSAFILE_CLUSTAL; goto DONE; }
-
- if (strncmp(buf, "!!AA_MULTIPLE_ALIGNMENT", 23) == 0 ||
- strncmp(buf, "!!NA_MULTIPLE_ALIGNMENT", 23) == 0)
- { fmt = MSAFILE_MSF; goto DONE; }
-
- /* PHYLIP id: also just a good bet */
- bufcpy = sre_strdup(buf, -1);
- s = bufcpy;
- if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) != NULL &&
- (s2 = sre_strtok(&s, WHITESPACE, NULL)) != NULL &&
- IsInt(s1) &&
- IsInt(s2))
- { free(bufcpy); fmt = MSAFILE_PHYLIP; goto DONE; }
- free(bufcpy);
- }
-
- /* We trust that other formats identify themselves soon.
- */
- /* dead giveaways for extended SELEX */
- if (strncmp(buf, "#=AU", 4) == 0 ||
- strncmp(buf, "#=ID", 4) == 0 ||
- strncmp(buf, "#=AC", 4) == 0 ||
- strncmp(buf, "#=DE", 4) == 0 ||
- strncmp(buf, "#=GA", 4) == 0 ||
- strncmp(buf, "#=TC", 4) == 0 ||
- strncmp(buf, "#=NC", 4) == 0 ||
- strncmp(buf, "#=SQ", 4) == 0 ||
- strncmp(buf, "#=SS", 4) == 0 ||
- strncmp(buf, "#=CS", 4) == 0 ||
- strncmp(buf, "#=RF", 4) == 0)
- { fmt = MSAFILE_SELEX; goto DONE; }
-
- if (strncmp(buf, "///", 3) == 0 || strncmp(buf, "ENTRY ", 6) == 0)
- { fmt = SQFILE_PIR; goto DONE; }
-
- /* a ha, diagnostic of an (old) MSF file */
- if ((strstr(buf, "..") != NULL) &&
- (strstr(buf, "MSF:") != NULL) &&
- (strstr(buf, "Check:")!= NULL))
- { fmt = MSAFILE_MSF; goto DONE; }
-
- /* unaligned GCG (must follow MSF test!) */
- if (strstr(buf, " Check: ") != NULL && strstr(buf, "..") != NULL)
- { fmt = SQFILE_GCG; goto DONE; }
-
- if (strncmp(buf,"LOCUS ",6) == 0 || strncmp(buf,"ORIGIN ",6) == 0)
- { fmt = SQFILE_GENBANK; goto DONE; }
-
- if (strncmp(buf,"ID ",5) == 0 || strncmp(buf,"SQ ",5) == 0)
- { fmt = SQFILE_EMBL; goto DONE; }
-
- /* But past here, we're being desperate. A simple SELEX file is
- * very difficult to detect; we can only try to disprove it.
- */
- s = buf;
- if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) == NULL) continue; /* skip blank lines */
- if (strchr("#%", *s1) != NULL) continue; /* skip comment lines */
-
- /* Disproof 1. Noncomment, nonblank lines in a SELEX file
- * must have at least two space-delimited fields (name/seq)
- */
- if ((s2 = sre_strtok(&s, WHITESPACE, NULL)) == NULL)
- has_junk = TRUE;
-
- /* Disproof 2.
- * The sequence field should look like a sequence.
- */
- if (s2 != NULL && Seqtype(s2) == kOtherSeq)
- has_junk = TRUE;
-
- ndataline++;
- if (ndataline == 300) break; /* only look at first 300 lines */
- }
-
- if (ndataline == 0)
- Die("Sequence file contains no data");
-
- /* If we've made it this far, we've run out of data, but there
- * was at least one line of it; check if we've
- * disproven SELEX. If not, cross our fingers, pray, and guess SELEX.
- */
- if (has_junk == TRUE) fmt = SQFILE_UNKNOWN;
- else fmt = MSAFILE_SELEX;
-
- DONE:
- if (buf != NULL) free(buf);
- rewind(fp);
- return fmt;
-}
-
-/* Function: GCGBinaryToSequence()
- *
- * Purpose: Convert a GCG 2BIT binary string to DNA sequence.
- * 0 = C 1 = T 2 = A 3 = G
- * 4 nts/byte
- *
- * Args: seq - binary sequence. Converted in place to DNA.
- * len - length of DNA. binary is (len+3)/4 bytes
- */
-int
-GCGBinaryToSequence(char *seq, int len)
-{
- int bpos; /* position in binary */
- int spos; /* position in sequence */
- char twobit;
- int i;
-
- for (bpos = (len-1)/4; bpos >= 0; bpos--)
- {
- twobit = seq[bpos];
- spos = bpos*4;
-
- for (i = 3; i >= 0; i--)
- {
- switch (twobit & 0x3) {
- case 0: seq[spos+i] = 'C'; break;
- case 1: seq[spos+i] = 'T'; break;
- case 2: seq[spos+i] = 'A'; break;
- case 3: seq[spos+i] = 'G'; break;
- }
- twobit = twobit >> 2;
- }
- }
- seq[len] = '\0';
- return 1;
-}
-
-
-/* Function: GCGchecksum()
- * Date: SRE, Mon May 31 11:13:21 1999 [St. Louis]
- *
- * Purpose: Calculate a GCG checksum for a sequence.
- * Code provided by Steve Smith of Genetics
- * Computer Group.
- *
- * Args: seq - sequence to calculate checksum for.
- * may contain gap symbols.
- * len - length of sequence (usually known,
- * so save a strlen() call)
- *
- * Returns: GCG checksum.
- */
-int
-GCGchecksum(char *seq, int len)
-{
- int i; /* position in sequence */
- int chk = 0; /* calculated checksum */
-
- for (i = 0; i < len; i++)
- chk = (chk + (i % 57 + 1) * (sre_toupper((int) seq[i]))) % 10000;
- return chk;
-}
-
-
-/* Function: GCGMultchecksum()
- *
- * Purpose: GCG checksum for a multiple alignment: sum of
- * individual sequence checksums (including their
- * gap characters) modulo 10000.
- *
- * Implemented using spec provided by Steve Smith of
- * Genetics Computer Group.
- *
- * Args: seqs - sequences to be checksummed; aligned or not
- * nseq - number of sequences
- *
- * Return: the checksum, a number between 0 and 9999
- */
-int
-GCGMultchecksum(char **seqs, int nseq)
-{
- int chk = 0;
- int idx;
-
- for (idx = 0; idx < nseq; idx++)
- chk = (chk + GCGchecksum(seqs[idx], strlen(seqs[idx]))) % 10000;
- return chk;
-}
-
-
-
-
-/* Function: Seqtype()
- *
- * Purpose: Returns a (very good) guess about type of sequence:
- * kDNA, kRNA, kAmino, or kOtherSeq.
- *
- * Modified from, and replaces, Gilbert getseqtype().
- */
-int
-Seqtype(char *seq)
-{
- int saw; /* how many non-gap characters I saw */
- char c;
- int po = 0; /* count of protein-only */
- int nt = 0; /* count of t's */
- int nu = 0; /* count of u's */
- int na = 0; /* count of nucleotides */
- int aa = 0; /* count of amino acids */
- int no = 0; /* count of others */
-
- /* Look at the first 300 non-gap characters
- */
- for (saw = 0; *seq != '\0' && saw < 300; seq++)
- {
- c = sre_toupper((int) *seq);
- if (! isgap(c))
- {
- if (strchr(protonly, c)) po++;
- else if (strchr(primenuc,c)) {
- na++;
- if (c == 'T') nt++;
- else if (c == 'U') nu++;
- }
- else if (strchr(aminos,c)) aa++;
- else if (isalpha((int) c)) no++;
- saw++;
- }
- }
-
- if (no > 0) return kOtherSeq;
- else if (po > 0) return kAmino;
- else if (na > aa) {
- if (nu > nt) return kRNA;
- else return kDNA;
- }
- else return kAmino; /* ooooh. risky. */
-}
-
-
-/* Function: GuessAlignmentSeqtype()
- * Date: SRE, Wed Jul 7 09:42:34 1999 [St. Louis]
- *
- * Purpose: Try to guess whether an alignment is protein
- * or nucleic acid; return a code for the
- * type (kRNA, kDNA, or kAmino).
- *
- * Args: aseq - array of aligned sequences. (Could also
- * be an rseq unaligned sequence array)
- * nseq - number of aseqs
- *
- * Returns: kRNA, kDNA, kAmino;
- * kOtherSeq if inconsistency is detected.
- */
-int
-GuessAlignmentSeqtype(char **aseq, int nseq)
-{
- int idx;
- int nrna = 0;
- int ndna = 0;
- int namino = 0;
- int nother = 0;
-
- for (idx = 0; idx < nseq; idx++)
- switch (Seqtype(aseq[idx])) {
- case kRNA: nrna++; break;
- case kDNA: ndna++; break;
- case kAmino: namino++; break;
- default: nother++;
- }
-
- /* Unambiguous decisions:
- */
- if (nother) return kOtherSeq;
- if (namino == nseq) return kAmino;
- if (ndna == nseq) return kDNA;
- if (nrna == nseq) return kRNA;
-
- /* Ambiguous decisions:
- */
- if (namino == 0) return kRNA; /* it's nucleic acid, but seems mixed RNA/DNA */
- return kAmino; /* some amino acid seen; others probably short seqs, some
- of which may be entirely ACGT (ala,cys,gly,thr). We
- could be a little more sophisticated: U would be a giveaway
- that we're not in protein seqs */
-}
-
-/* Function: WriteSimpleFASTA()
- * Date: SRE, Tue Nov 16 18:06:00 1999 [St. Louis]
- *
- * Purpose: Just write a FASTA format sequence to a file;
- * minimal interface, mostly for quick and dirty programs.
- *
- * Args: fp - open file handle (stdout, possibly)
- * seq - sequence to output
- * name - name for the sequence
- * desc - optional description line, or NULL.
- *
- * Returns: void
- */
-void
-WriteSimpleFASTA(FILE *fp, char *seq, char *name, char *desc)
-{
- char buf[61];
- int len;
- int pos;
-
- len = strlen(seq);
- buf[60] = '\0';
- fprintf(fp, ">%s %s\n", name, desc != NULL ? desc : "");
- for (pos = 0; pos < len; pos += 60)
- {
- strncpy(buf, seq+pos, 60);
- fprintf(fp, "%s\n", buf);
- }
-}
-
-int
-WriteSeq(FILE *outf, int outform, char *seq, SQINFO *sqinfo)
-{
- int numline = 0;
- int lines = 0, spacer = 0, width = 50, tab = 0;
- int i, j, l, l1, ibase;
- char endstr[10];
- char s[100]; /* buffer for sequence */
- char ss[100]; /* buffer for structure */
- int checksum = 0;
- int seqlen;
- int which_case; /* 0 = do nothing. 1 = upper case. 2 = lower case */
- int dostruc; /* TRUE to print structure lines*/
-
- which_case = 0;
- dostruc = FALSE;
- seqlen = (sqinfo->flags & SQINFO_LEN) ? sqinfo->len : strlen(seq);
-
- if (IsAlignmentFormat(outform))
- Die("Tried to write an aligned format with WriteSeq() -- bad, bad.");
-
-
- strcpy( endstr,"");
- l1 = 0;
- checksum = GCGchecksum(seq, seqlen);
-
- switch (outform) {
- case SQFILE_UNKNOWN: /* no header, just sequence */
- strcpy(endstr,"\n"); /* end w/ extra blank line */
- break;
-
- case SQFILE_GENBANK:
- fprintf(outf,"LOCUS %s %d bp\n",
- sqinfo->name, seqlen);
- fprintf(outf,"ACCESSION %s\n",
- (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : ".");
- fprintf(outf,"DEFINITION %s\n",
- (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : ".");
- fprintf(outf,"VERSION %s\n",
- (sqinfo->flags & SQINFO_ID) ? sqinfo->id : ".");
- fprintf(outf,"ORIGIN \n");
- spacer = 11;
- numline = 1;
- strcpy(endstr, "\n//");
- break;
-
- case SQFILE_GCGDATA:
- fprintf(outf, ">>>>%s 9/95 ASCII Len: %d\n", sqinfo->name, seqlen);
- fprintf(outf, "%s\n", (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");
- break;
-
- case SQFILE_PIR:
- fprintf(outf, "ENTRY %s\n",
- (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name);
- fprintf(outf, "TITLE %s\n",
- (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");
- fprintf(outf, "ACCESSION %s\n",
- (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-");
- fprintf(outf, "SUMMARY #Length %d #Checksum %d\n",
- sqinfo->len, checksum);
- fprintf(outf, "SEQUENCE\n");
- fprintf(outf, " 5 10 15 20 25 30\n");
- spacer = 2; /* spaces after every residue */
- numline = 1; /* number lines w/ coords */
- width = 30; /* 30 aa per line */
- strcpy(endstr, "\n///");
- break;
-
- case SQFILE_SQUID:
- fprintf(outf, "NAM %s\n", sqinfo->name);
- if (sqinfo->flags & (SQINFO_ID | SQINFO_ACC | SQINFO_START | SQINFO_STOP | SQINFO_OLEN))
- fprintf(outf, "SRC %s %s %d..%d::%d\n",
- (sqinfo->flags & SQINFO_ID) ? sqinfo->id : "-",
- (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-",
- (sqinfo->flags & SQINFO_START) ? sqinfo->start : 0,
- (sqinfo->flags & SQINFO_STOP) ? sqinfo->stop : 0,
- (sqinfo->flags & SQINFO_OLEN) ? sqinfo->olen : 0);
- if (sqinfo->flags & SQINFO_DESC)
- fprintf(outf, "DES %s\n", sqinfo->desc);
- if (sqinfo->flags & SQINFO_SS)
- {
- fprintf(outf, "SEQ +SS\n");
- dostruc = TRUE; /* print structure lines too */
- }
- else
- fprintf(outf, "SEQ\n");
- numline = 1; /* number seq lines w/ coords */
- strcpy(endstr, "\n++");
- break;
-
- case SQFILE_EMBL:
- fprintf(outf,"ID %s\n",
- (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name);
- fprintf(outf,"AC %s\n",
- (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-");
- fprintf(outf,"DE %s\n",
- (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");
- fprintf(outf,"SQ %d BP\n", seqlen);
- strcpy(endstr, "\n//"); /* 11Oct90: bug fix*/
- tab = 5; /** added 31jan91 */
- spacer = 11; /** added 31jan91 */
- break;
-
- case SQFILE_GCG:
- fprintf(outf,"%s\n", sqinfo->name);
- if (sqinfo->flags & SQINFO_ACC)
- fprintf(outf,"ACCESSION %s\n", sqinfo->acc);
- if (sqinfo->flags & SQINFO_DESC)
- fprintf(outf,"DEFINITION %s\n", sqinfo->desc);
- fprintf(outf," %s Length: %d (today) Check: %d ..\n",
- sqinfo->name, seqlen, checksum);
- spacer = 11;
- numline = 1;
- strcpy(endstr, "\n"); /* this is insurance to help prevent misreads at eof */
- break;
-
- case SQFILE_STRIDER: /* ?? map ?*/
- fprintf(outf,"; ### from DNA Strider ;-)\n");
- fprintf(outf,"; DNA sequence %s, %d bases, %d checksum.\n;\n",
- sqinfo->name, seqlen, checksum);
- strcpy(endstr, "\n//");
- break;
-
- /* SRE: Don had Zuker default to Pearson, which is not
- intuitive or helpful, since Zuker's MFOLD can't read
- Pearson format. More useful to use kIG */
- case SQFILE_ZUKER:
- which_case = 1; /* MFOLD requires upper case. */
- /*FALLTHRU*/
- case SQFILE_IG:
- fprintf(outf,";%s %s\n",
- sqinfo->name,
- (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "");
- fprintf(outf,"%s\n", sqinfo->name);
- strcpy(endstr,"1"); /* == linear dna */
- break;
-
- case SQFILE_RAW: /* Raw: no header at all. */
- break;
-
- default :
- case SQFILE_FASTA:
- fprintf(outf,">%s %s\n", sqinfo->name,
- (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "");
- break;
- }
-
- if (which_case == 1) s2upper(seq);
- if (which_case == 2) s2lower(seq);
-
-
- width = MIN(width,100);
- for (i=0, l=0, ibase = 1, lines = 0; i < seqlen; ) {
- if (l1 < 0) l1 = 0;
- else if (l1 == 0) {
- if (numline) fprintf(outf,"%8d ",ibase);
- for (j=0; j<tab; j++) fputc(' ',outf);
- }
- if ((spacer != 0) && ((l+1) % spacer == 1))
- { s[l] = ' '; ss[l] = ' '; l++; }
- s[l] = seq[i];
- ss[l] = (sqinfo->flags & SQINFO_SS) ? sqinfo->ss[i] : '.';
- l++; i++;
- l1++; /* don't count spaces for width*/
- if (l1 == width || i == seqlen) {
- s[l] = ss[l] = '\0';
- l = 0; l1 = 0;
- if (dostruc)
- {
- fprintf(outf, "%s\n", s);
- if (numline) fprintf(outf," ");
- for (j=0; j<tab; j++) fputc(' ',outf);
- if (i == seqlen) fprintf(outf,"%s%s\n",ss,endstr);
- else fprintf(outf,"%s\n",ss);
- }
- else
- {
- if (i == seqlen) fprintf(outf,"%s%s\n",s,endstr);
- else fprintf(outf,"%s\n",s);
- }
- lines++;
- ibase = i+1;
- }
- }
- return lines;
-}
-
-
-/* Function: ReadMultipleRseqs()
- *
- * Purpose: Open a data file and
- * parse it into an array of rseqs (raw, unaligned
- * sequences).
- *
- * Caller is responsible for free'ing memory allocated
- * to ret_rseqs, ret_weights, and ret_names.
- *
- * Weights are currently only supported for MSF format.
- * Sequences read from all other formats will be assigned
- * weights of 1.0. If the caller isn't interested in
- * weights, it passes NULL as ret_weights.
- *
- * Returns 1 on success. Returns 0 on failure and sets
- * squid_errno to indicate the cause.
- */
-int
-ReadMultipleRseqs(char *seqfile,
- int fformat,
- char ***ret_rseqs,
- SQINFO **ret_sqinfo,
- int *ret_num)
-{
- SQINFO *sqinfo; /* array of sequence optional info */
- SQFILE *dbfp; /* open ptr for sequential access of file */
- char **rseqs; /* sequence array */
- int numalloced; /* num of seqs currently alloced for */
- int num;
-
-
- num = 0;
- numalloced = 16;
- rseqs = (char **) MallocOrDie (numalloced * sizeof(char *));
- sqinfo = (SQINFO *) MallocOrDie (numalloced * sizeof(SQINFO));
- if ((dbfp = SeqfileOpen(seqfile, fformat, NULL)) == NULL) return 0;
-
- while (ReadSeq(dbfp, dbfp->format, &rseqs[num], &(sqinfo[num])))
- {
- num++;
- if (num == numalloced) /* more seqs coming, alloc more room */
- {
- numalloced += 16;
- rseqs = (char **) ReallocOrDie (rseqs, numalloced*sizeof(char *));
- sqinfo = (SQINFO *) ReallocOrDie (sqinfo, numalloced * sizeof(SQINFO));
- }
- }
- SeqfileClose(dbfp);
-
- *ret_rseqs = rseqs;
- *ret_sqinfo = sqinfo;
- *ret_num = num;
- return 1;
-}
-
-
-/* Function: String2SeqfileFormat()
- * Date: SRE, Sun Jun 27 15:25:54 1999 [TW 723 over Canadian Shield]
- *
- * Purpose: Convert a string (e.g. from command line option arg)
- * to a format code. Case insensitive. Return
- * MSAFILE_UNKNOWN/SQFILE_UNKNOWN if string is bad.
- * Uses codes defined in squid.h (unaligned formats) and
- * msa.h (aligned formats).
- *
- * Args: s - string to convert; e.g. "stockholm"
- *
- * Returns: format code; e.g. MSAFILE_STOCKHOLM.
- * Returns SQFILE_UNKNOWN (same as MSAFILE_UNKNOWN) if string is
- * not valid.
- */
-int
-String2SeqfileFormat(char *s)
-{
- char *s2;
- int code = SQFILE_UNKNOWN;
-
- if (s == NULL) return SQFILE_UNKNOWN;
- s2 = sre_strdup(s, -1);
- s2upper(s2);
-
- if (strcmp(s2, "FASTA") == 0) code = SQFILE_FASTA;
- else if (strcmp(s2, "GENBANK") == 0) code = SQFILE_GENBANK;
- else if (strcmp(s2, "EMBL") == 0) code = SQFILE_EMBL;
- else if (strcmp(s2, "GCG") == 0) code = SQFILE_GCG;
- else if (strcmp(s2, "GCGDATA") == 0) code = SQFILE_GCGDATA;
- else if (strcmp(s2, "RAW") == 0) code = SQFILE_RAW;
- else if (strcmp(s2, "IG") == 0) code = SQFILE_IG;
- else if (strcmp(s2, "STRIDER") == 0) code = SQFILE_STRIDER;
- else if (strcmp(s2, "IDRAW") == 0) code = SQFILE_IDRAW;
- else if (strcmp(s2, "ZUKER") == 0) code = SQFILE_ZUKER;
- else if (strcmp(s2, "PIR") == 0) code = SQFILE_PIR;
- else if (strcmp(s2, "SQUID") == 0) code = SQFILE_SQUID;
- else if (strcmp(s2, "STOCKHOLM") == 0) code = MSAFILE_STOCKHOLM;
- else if (strcmp(s2, "SELEX") == 0) code = MSAFILE_SELEX;
- else if (strcmp(s2, "MSF") == 0) code = MSAFILE_MSF;
- else if (strcmp(s2, "CLUSTAL") == 0) code = MSAFILE_CLUSTAL;
- else if (strcmp(s2, "A2M") == 0) code = MSAFILE_A2M;
- else if (strcmp(s2, "PHYLIP") == 0) code = MSAFILE_PHYLIP;
- else if (strcmp(s2, "EPS") == 0) code = MSAFILE_EPS;
-
- free(s2);
- return code;
-}
-char *
-SeqfileFormat2String(int code)
-{
- switch (code) {
- case SQFILE_UNKNOWN: return "unknown";
- case SQFILE_FASTA: return "FASTA";
- case SQFILE_GENBANK: return "Genbank";
- case SQFILE_EMBL: return "EMBL";
- case SQFILE_GCG: return "GCG";
- case SQFILE_GCGDATA: return "GCG data library";
- case SQFILE_RAW: return "raw";
- case SQFILE_IG: return "Intelligenetics";
- case SQFILE_STRIDER: return "MacStrider";
- case SQFILE_IDRAW: return "Idraw Postscript";
- case SQFILE_ZUKER: return "Zuker";
- case SQFILE_PIR: return "PIR";
- case SQFILE_SQUID: return "SQUID";
- case MSAFILE_STOCKHOLM: return "Stockholm";
- case MSAFILE_SELEX: return "SELEX";
- case MSAFILE_MSF: return "MSF";
- case MSAFILE_CLUSTAL: return "Clustal";
- case MSAFILE_A2M: return "a2m";
- case MSAFILE_PHYLIP: return "Phylip";
- case MSAFILE_EPS: return "EPS";
- default:
- Die("Bad code passed to MSAFormat2String()");
- }
- /*NOTREACHED*/
- return NULL;
-}
-
-
-/* Function: MSAToSqinfo()
- * Date: SRE, Tue Jul 20 14:36:56 1999 [St. Louis]
- *
- * Purpose: Take an MSA and generate a SQINFO array suitable
- * for use in annotating the unaligned sequences.
- * Return the array.
- *
- * Permanent temporary code. sqinfo was poorly designed.
- * it must eventually be replaced, but the odds
- * of this happening soon are nil, so I have to deal.
- *
- * Args: msa - the alignment
- *
- * Returns: ptr to allocated sqinfo array.
- * Freeing is ghastly: free in each individual sqinfo[i]
- * with FreeSequence(NULL, &(sqinfo[i])), then
- * free(sqinfo).
- */
-SQINFO *
-MSAToSqinfo(MSA *msa)
-{
- int idx;
- SQINFO *sqinfo;
-
- sqinfo = MallocOrDie(sizeof(SQINFO) * msa->nseq);
-
- for (idx = 0; idx < msa->nseq; idx++)
- {
- sqinfo[idx].flags = 0;
- SetSeqinfoString(&(sqinfo[idx]),
- msa->sqname[idx], SQINFO_NAME);
- SetSeqinfoString(&(sqinfo[idx]),
- MSAGetSeqAccession(msa, idx), SQINFO_ACC);
- SetSeqinfoString(&(sqinfo[idx]),
- MSAGetSeqDescription(msa, idx), SQINFO_DESC);
-
- if (msa->ss != NULL && msa->ss[idx] != NULL) {
- MakeDealignedString(msa->aseq[idx], msa->alen,
- msa->ss[idx], &(sqinfo[idx].ss));
- sqinfo[idx].flags |= SQINFO_SS;
- }
-
- if (msa->sa != NULL && msa->sa[idx] != NULL) {
- MakeDealignedString(msa->aseq[idx], msa->alen,
- msa->sa[idx], &(sqinfo[idx].sa));
- sqinfo[idx].flags |= SQINFO_SA;
- }
-
- sqinfo[idx].len = DealignedLength(msa->aseq[idx]);
- sqinfo[idx].flags |= SQINFO_LEN;
- }
- return sqinfo;
-}
-
-
-
-/* cc -o sqio_test -DA_QUIET_DAY -L. sqio.c -lsquid */
-#ifdef A_QUIET_DAY
-#include "ssi.h"
-int
-main(int argc, char **argv)
-{
- FILE *fp;
- char *filename;
- char *buf;
- int len;
- int mode = 3;
- SSIOFFSET off;
-
- filename = argv[1];
-
- if (mode == 1) {
- buf = malloc(sizeof(char) * 256);
- if ((fp = fopen(filename, "r")) == NULL)
- Die("open of %s failed", filename);
- while (fgets(buf, 255, fp) != NULL)
- ;
- fclose(fp);
- free(buf);
- } else if (mode == 2) {
- if ((fp = fopen(filename, "r")) == NULL)
- Die("open of %s failed", filename);
- buf = NULL; len = 0;
- while (sre_fgets(&buf, &len, fp) != NULL)
- SSIGetFilePosition(fp, SSI_OFFSET_I32, &off);
- fclose(fp);
- free(buf);
- } else if (mode == 3) {
- SQFILE *dbfp;
- SQINFO info;
-
- if ((dbfp = SeqfileOpen(filename, SQFILE_FASTA, NULL)) == NULL)
- Die("open of %s failed", filename);
- while (ReadSeq(dbfp, dbfp->format, &buf, &info)) {
- SSIGetFilePosition(dbfp->f, SSI_OFFSET_I32, &off);
- FreeSequence(buf, &info);
- }
- SeqfileClose(dbfp);
- }
-
-}
-
-
-#endif
diff --git a/squid/squid.h.in b/squid/squid.h.in
deleted file mode 100644
index bca39b5..0000000
--- a/squid/squid.h.in
+++ /dev/null
@@ -1,475 +0,0 @@
-/* @configure_input@ */
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#ifndef SQUIDH_INCLUDED
-#define SQUIDH_INCLUDED
-
-/* squid.h
- * Header file for my library of sequence functions.
- *
- * CVS $Id: squid.h.in,v 1.6 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include <stdio.h>
-#include <math.h>
-#include <stdlib.h>
-#include <unistd.h> /* for sysconf() #define's */
-
-
-#if DEBUGLEVEL > 0
-#include <assert.h> /* for SQD_DASSERT1(), etc. */
-#endif
-
-/*****************************************************************
- * Integers of guaranteed size. (used for instance in gsi.c, gsi2.c)
- * These are set by the ./configure script; if they show up as FIXME,
- * they must be manually edited to appropriate type definitions. You
- * do need 64-bit integers in the current code; email me if this
- * prevents you from compiling SQUID and tell me your system (I don't
- * know of any systems that don't have 64-bit integers these days).
- *****************************************************************/
-typedef @SQD_UINT16@ sqd_uint16;
-typedef @SQD_UINT32@ sqd_uint32;
-typedef @SQD_UINT64@ sqd_uint64;
-
-#ifdef USE_HOST_BYTESWAP_FUNCTIONS
-#include <sys/types.h> /* only for ntohl() and friends. */
-#include <netinet/in.h> /* only for ntohl() and friends. */
-#define sre_ntoh16(x) ntohs(x);
-#define sre_ntoh32(x) ntohl(x);
-#define sre_hton16(x) htons(x);
-#define sre_hton32(x) htonl(x);
-#endif /* USE_HOST_BYTESWAP_FUNCTIONS */
-
-/* Library version info is made available as a global to
- * any interested program. These are defined in iupac.c
- * with the other globals.
- */
-extern char squid_version[]; /* version number */
-extern char squid_date[]; /* date of release */
-extern int squid_errno; /* error codes */
-
-
-
-/****************************************************
- * Error codes returned by squid library functions (squid_errno)
- ****************************************************/
-
-#define SQERR_OK 0 /* no error */
-#define SQERR_UNKNOWN 1 /* generic error, unidentified */
-#define SQERR_NODATA 2 /* unexpectedly NULL stream */
-#define SQERR_MEM 3 /* malloc or realloc failed */
-#define SQERR_NOFILE 4 /* file not found */
-#define SQERR_FORMAT 5 /* file format not recognized */
-#define SQERR_PARAMETER 6 /* bad parameter passed to func */
-#define SQERR_DIVZERO 7 /* error in sre_math.c */
-#define SQERR_INCOMPAT 8 /* incompatible parameters */
-#define SQERR_EOD 9 /* end-of-data (often normal) */
-
-/****************************************************
- * Single sequence information
- ****************************************************/
-#define SQINFO_NAMELEN 64
-#define SQINFO_DESCLEN 128
-
-struct seqinfo_s {
- int flags; /* what extra data are available */
- char name[SQINFO_NAMELEN];/* up to 63 characters of name */
- char id[SQINFO_NAMELEN]; /* up to 63 char of database identifier */
- char acc[SQINFO_NAMELEN]; /* up to 63 char of database accession # */
- char desc[SQINFO_DESCLEN];/* up to 127 char of description */
- int len; /* length of this seq */
- int start; /* (1..len) start position on source seq */
- int stop; /* (1..len) end position on source seq */
- int olen; /* original length of source seq */
- int type; /* kRNA, kDNA, kAmino, or kOther */
- char *ss; /* 0..len-1 secondary structure string */
- char *sa; /* 0..len-1 % side chain surface access. */
-};
-typedef struct seqinfo_s SQINFO;
-
-#define SQINFO_NAME (1 << 0)
-#define SQINFO_ID (1 << 1)
-#define SQINFO_ACC (1 << 2)
-#define SQINFO_DESC (1 << 3)
-#define SQINFO_START (1 << 4)
-#define SQINFO_STOP (1 << 5)
-#define SQINFO_LEN (1 << 6)
-#define SQINFO_TYPE (1 << 7)
-#define SQINFO_OLEN (1 << 8)
-#define SQINFO_SS (1 << 9)
-#define SQINFO_SA (1 << 10)
-
-
-/****************************************************
- * Sequence alphabet: see also iupac.c
- ****************************************************/
- /* IUPAC symbols defined globally in iupac.c */
-struct iupactype {
- char sym; /* character representation */
- char symcomp; /* complement (regular char */
- char code; /* my binary rep */
- char comp; /* binary encoded complement */
-};
-extern struct iupactype iupac[];
-#define IUPACSYMNUM 17
-
-extern char *stdcode1[]; /* 1-letter amino acid translation code */
-extern char *stdcode3[]; /* 3-letter amino acid translation code */
-extern float dnafq[]; /* nucleotide occurrence frequencies */
-extern float aafq[]; /* amino acid occurrence frequencies */
-extern char aa_alphabet[]; /* amino acid alphabet */
-extern int aa_index[]; /* convert 0..19 indices to 0..26 */
-
- /* valid symbols in IUPAC code */
-#define NUCLEOTIDES "ACGTUNRYMKSWHBVDacgtunrymkswhbvd"
-#define AMINO_ALPHABET "ACDEFGHIKLMNPQRSTVWY"
-#define DNA_ALPHABET "ACGT"
-#define RNA_ALPHABET "ACGU"
-#define WHITESPACE " \t\n"
-
-#define isgap(c) ((c) == ' ' || (c) == '.' || (c) == '_' || (c) == '-' || (c) == '~')
-
-
-/****************************************************
- * Sequence i/o: originally from Don Gilbert's readseq
- ****************************************************/
-#include "msa.h" /* for multiple sequence alignment support */
-
- /* buffer size for reading in lines from sequence files*/
-#define LINEBUFLEN 4096
-
-/* sequence types parsed by Seqtype() */
-/* note that these must match hmmAMINO and hmmNUCLEIC in HMMER */
-#define kOtherSeq 0 /* hmmNOTSETYET */
-#define kDNA 1
-#define kRNA 2 /* hmmNUCLEIC */
-#define kAmino 3 /* hmmAMINO */
-
-/* Unaligned sequence file formats recognized
- * Coexists with definitions of multiple alignment formats in msa.h:
- * >100 reserved for alignment formats
- * <100 reserved for unaligned formats
- * 0 reserved for unknown
- *
- * Some "legacy" formats are supported only when explicitly
- * requested; not autodetected by SeqfileFormat().
- *
- * DON'T REASSIGN THESE CODES. They're written into
- * GSI index files. You can use new ones, but reassigning
- * the sense of old ones will break GSI indices.
- * Alignment format codes were reassigned with the creation
- * of msa.c, but before Stockholm format, there were no
- * indexed alignment databases.
- */
-#define SQFILE_UNKNOWN 0 /* unknown format */
-#define SQFILE_IG 1 /* Intelligenetics (!) */
-#define SQFILE_GENBANK 2 /* GenBank flatfile */
- /* 3 was A2M. Now an alignment format */
-#define SQFILE_EMBL 4 /* EMBL or Swissprot flatfile */
-#define SQFILE_GCG 5 /* GCG single sequence files */
-#define SQFILE_STRIDER 6 /* MacStrider (!!) */
-#define SQFILE_FASTA 7 /* FASTA format: default */
-#define SQFILE_ZUKER 8 /* Zuker MFOLD format (legacy) */
-#define SQFILE_IDRAW 9 /* Idraw-style PostScript (legacy) */
- /* 10 was SELEX. Now alignment format */
- /* 11 was MSF. Now alignment format */
-#define SQFILE_PIR 12 /* PIR format */
-#define SQFILE_RAW 13 /* raw sequence */
-#define SQFILE_SQUID 14 /* my obsolete squid format */
- /* 15 was kXPearson, extended FASTA; withdrawn */
-#define SQFILE_GCGDATA 16 /* GCG data library file */
- /* 17 was Clustal. Now alignment format*/
-
-#define IsUnalignedFormat(fmt) ((fmt) && (fmt) < 100)
-
-#include "ssi.h"
-
-struct ReadSeqVars {
- FILE *f; /* open file pointer */
- char *fname; /* name of file; used for diagnostics */
- int linenumber; /* what line are we on in the file */
-
- char *buf; /* dynamically allocated sre_fgets() buffer */
- int buflen; /* allocation length for buf */
-
- int ssimode; /* SSI_OFFSET_I32 or SSI_OFFSET_I64 */
- SSIOFFSET ssioffset; /* disk offset to last line read into buf */
- SSIOFFSET r_off; /* offset to start of record */
- SSIOFFSET d_off; /* offset to start of sequence data */
-
- int rpl; /* residues per data line for this file; -1 if unset, 0 if invalid */
- int lastrpl; /* rpl on last line seen */
- int maxrpl; /* max rpl on any line of the file */
- int bpl; /* bytes per data line; -1 if unset, 0 if invalid */
- int lastbpl; /* bpl on last line seen */
- int maxbpl; /* max bpl on any line of the file */
-
- char *seq; /* growing sequence during parse */
- SQINFO *sqinfo; /* name, id, etc, gathered during parse */
- char *sp;
- int seqlen; /* current sequence length */
- int maxseq; /* current allocation length for seq */
-
- int format; /* format of seqfile we're reading. */
- int do_gzip; /* TRUE if f is a pipe from gzip -dc */
- int do_stdin; /* TRUE if f is stdin */
-
- /* An (important) hack for sequential access of multiple alignment files:
- * we read the whole alignment in,
- * and then copy it one sequence at a time into seq and sqinfo.
- * It is active if msa is non NULL.
- * msa->lastidx is reused/overloaded: used to keep track of what
- * seq we'll return next.
- * afp->format is the real format, while SQFILE->format is kMSA.
- * Because we keep it in the SQFILE structure,
- * ReadSeq() and friends are always reentrant for multiple seqfiles.
- */
- MSA *msa;
- MSAFILE *afp;
-};
-typedef struct ReadSeqVars SQFILE;
-
-
-/****************************************************
- * Cluster analysis and phylogenetic tree support
- ****************************************************/
-
-/* struct phylo_s - a phylogenetic tree
- *
- * For N sequences, there will generally be an array of 0..N-2
- * phylo_s structures representing the nodes of a tree.
- * [0] is the root. The indexes of left and
- * right children are somewhat confusing so be careful. The
- * indexes can have values of 0..2N-2. If they are 0..N-1, they
- * represent pointers to individual sequences. If they are
- * >= N, they represent pointers to a phylo_s structure
- * at (index - N).
- */
-struct phylo_s {
- int parent; /* index of parent, N..2N-2, or -1 for root */
- int left; /* index of one of the branches, 0..2N-2 */
- int right; /* index of other branch, 0..2N-2 */
- float diff; /* difference score between seqs */
- float lblen; /* left branch length */
- float rblen; /* right branch length */
- char *is_in; /* 0..N-1 flag array, 1 if seq included */
- int incnum; /* number of seqs included at this node */
-};
-
-
-/* Strategies for cluster analysis; cluster by mean distance,
- * minimum distance, or maximum distance.
- */
-enum clust_strategy { CLUSTER_MEAN, CLUSTER_MAX, CLUSTER_MIN };
-
-/****************************************************
- * Generic data structure support
- ****************************************************/
-
-/* a struct intstack_s implements a pushdown stack for storing
- * single integers.
- */
-struct intstack_s {
- int data;
- struct intstack_s *nxt;
-};
-
-/****************************************************
- * Binary nucleotide alphabet support
- ****************************************************/
-
-/* Binary encoding of the IUPAC code for nucleotides
- *
- * four-bit "word", permitting rapid degenerate matching
- * A C G T/U
- * 0 0 1 0
- */
-#define NTA 8
-#define NTC 4
-#define NTG 2
-#define NTT 1
-#define NTU 1
-#define NTN 15 /* A|C|G|T */
-#define NTR 10 /* A|G */
-#define NTY 5 /* C|T */
-#define NTM 12 /* A|C */
-#define NTK 3 /* G|T */
-#define NTS 6 /* C|G */
-#define NTW 9 /* A|T */
-#define NTH 13 /* A|C|T */
-#define NTB 7 /* C|G|T */
-#define NTV 14 /* A|C|G */
-#define NTD 11 /* A|G|T */
-#define NTGAP 16 /* GAP */
-#define NTEND 0 /* null string terminator */
-
-/* ntmatch(): bitwise comparison of two nuc's
- * note that it's sensitive to the order;
- * probe may be degenerate but target should not be
- */
-#define ntmatch(probe, target) ((probe & target) == target)
-
-/****************************************************
- * Support for a portable, flexible Getopt()
- ****************************************************/
-
-/* Structure: opt_s
- *
- * Structure for declaring options to a main().
- */
-struct opt_s {
- char *name; /* name of option, e.g. "--option1" or "-o" */
- int single; /* TRUE if a single letter option */
- int argtype; /* for typechecking, e.g. sqdARG_INT */
-};
- /* acceptable argtype's... */
-#define sqdARG_NONE 0 /* no argument */
-#define sqdARG_INT 1 /* something that atoi() can grok */
-#define sqdARG_FLOAT 2 /* something that atof() can grok */
-#define sqdARG_CHAR 3 /* require single character or digit */
-#define sqdARG_STRING 4 /* anything goes */
-
-/****************************************************
- * Support for convenient Perl-y regexp matching
- * See hsregexp.c for copyright notice: this code is derived
- * from Henry Spencer's freely distributed regexp library.
- ****************************************************/
-
-#define NSUBEXP 10
-typedef struct sqd_regexp {
- char *startp[NSUBEXP];
- char *endp[NSUBEXP];
- char regstart; /* Internal use only. */
- char reganch; /* Internal use only. */
- char *regmust; /* Internal use only. */
- int regmlen; /* Internal use only. */
- char program[1]; /* Unwarranted chumminess with compiler. */
-} sqd_regexp;
-
-/* Strparse() defines and manages these.
- * sqd_parse[0] contains the substring that matched the pattern.
- * sqd_parse[1-9] contain substrings matched with ()'s.
- */
-extern char *sqd_parse[10];
-
-/****************************************************
- * Portable detection of multiprocessor # of CPUs.
- * #include <unistd.h>
- * long foo = SQD_NPROC;
- * returns the number of available processors.
- * if foo == -1, we failed.
- ****************************************************/
-
-/* Our problem here is that POSIX apparently doesn't specify
- * a standard for how to get sysconf() to report the number of
- * processors on-line. _SC_NPROCESSORS_ONLN is specified
- * by SVR4.0MP. Thanks to W. Gish for help here.
- */
-#undef SQD_NPROC
-#if defined(_SC_NPROCESSORS_ONLN) /* Sun Solaris, Digital UNIX */
- #define SQD_NPROC sysconf(_SC_NPROCESSORS_ONLN)
-#elif defined(_SC_NPROC_ONLN) /* Silicon Graphics IRIX */
- #define SQD_NPROC sysconf(_SC_NPROC_ONLN)
-#elif defined(_SC_NPROCESSORS_CONF) /* _ONLN is favored over _CONF */
- #define SQD_NPROC sysconf(_SC_NPROCESSORS_CONF)
-#else /* some systems don't support getting ncpu via sysconf() */
- #define SQD_NPROC -1
-#endif
-
-/****************************************************
- * Three levels of debugging printf's and assert's
- * level 1: little impact on verbosity or performance
- * level 2: moderate impact
- * level 3: high impact
- * Example:
- * SQD_DPRINTF3(("Matrix row %d col %d = %f\n", i, j, val));
- * Note the double parentheses; these are important.
- ****************************************************/
-
-#ifndef DEBUGLEVEL
-#define DEBUGLEVEL 0
-#endif
-
-#if (DEBUGLEVEL >= 1)
-#define SQD_DPRINTF1(x) printf x
-#define SQD_DASSERT1(x) assert x
-#else
-#define SQD_DPRINTF1(x)
-#define SQD_DASSERT1(x)
-#endif
-#if (DEBUGLEVEL >= 2)
-#define SQD_DPRINTF2(x) printf x
-#define SQD_DASSERT2(x) assert x
-#else
-#define SQD_DPRINTF2(x)
-#define SQD_DASSERT2(x)
-#endif
-#if (DEBUGLEVEL >= 3)
-#define SQD_DPRINTF3(x) printf x
-#define SQD_DASSERT3(x) assert x
-#else
-#define SQD_DPRINTF3(x)
-#define SQD_DASSERT3(x)
-#endif
-
-/* PANIC is called for failures of Std C/POSIX functions,
- * instead of my own functions. Panic() calls perror() and exits
- * abnormally.
- */
-#define PANIC Panic(__FILE__, __LINE__)
-
-/* Malloc/realloc calls are wrapped
- */
-#define MallocOrDie(x) sre_malloc(__FILE__, __LINE__, (x))
-#define ReallocOrDie(x,y) sre_realloc(__FILE__, __LINE__, (x), (y))
-
-/****************************************************
- * Miscellaneous macros and defines
- ****************************************************/
-
-#define SQDCONST_E 2.71828182845904523536028747135
-#define SQDCONST_PI 3.14159265358979323846264338328
-
- /* must declare swapfoo to use SWAP() */
-#define SWAP(a,b) {swapfoo = b; b = a; a = swapfoo;}
-#define ScalarsEqual(a,b) (fabs((a)-(b)) < 1e-7)
-
-#ifndef MIN
-#define MIN(a,b) (((a)<(b))?(a):(b))
-#endif
-#ifndef MAX
-#define MAX(a,b) (((a)>(b))?(a):(b))
-#endif
-
-/* For convenience and (one hopes) clarity in boolean tests:
- */
-#ifndef TRUE
-#define TRUE 1
-#endif
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-/* Somewhere, there is a universe in which Unix vendors comply
- * with the ANSI C standard. Unfortunately, it is not ours:
- */
-#ifndef EXIT_SUCCESS
-#define EXIT_SUCCESS 0
-#endif
-#ifndef EXIT_FAILURE
-#define EXIT_FAILURE 1
-#endif
-
-#include "sqfuncs.h" /* squid function declarations */
-#include "sre_random.h" /* random number generator and samplers */
-#include "vectorops.h" /* vector operations */
-#endif /* SQUIDH_INCLUDED */
diff --git a/squid/squidconf.h.in b/squid/squidconf.h.in
deleted file mode 100644
index d15277c..0000000
--- a/squid/squidconf.h.in
+++ /dev/null
@@ -1,122 +0,0 @@
-/* @configure_input@ */
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#ifndef SQUIDCONFH_INCLUDED
-#define SQUIDCONFH_INCLUDED
-
-/* squidconf.h
- * Captures #define's generated by the ./configure script;
- * this file must be the first header included in any squid file.
- */
-
-/*****************************************************************
- * Version info that's set by the ./configure script.
- *****************************************************************/
-
-#undef SQUID_VERSION
-#undef SQUID_DATE
-#undef SQUID_COPYRIGHT
-#undef SQUID_LICENSE
-
-/*****************************************************************
- * Optional feature enabling that's done by the ./configure script.
- *****************************************************************/
-
-/* --enable-debugging=x debugging diagnostics (development versions only)
- */
-#ifndef DEBUGLEVEL
-#undef DEBUGLEVEL
-#endif
-
-/* --enable-lfs Large File Summit (LFS) support for >2GB files
- */
-#undef _LARGEFILE_SOURCE
-#undef _LARGEFILE64_SOURCE
-#undef _FILE_OFFSET_BITS
-
-
-/* --enable-pvm Parallel Virtual Machine (PVM)
- */
-#undef SRE_ENABLE_PVM
-
-
-
-/*****************************************************************
- * Sizes of integer types.
- * various things are set by ./configure; the code
- * uses WORDS_BIGENDIAN and USE_HOST_BYTESWAP_FUNCTIONS.
- *****************************************************************/
-#undef WORDS_BIGENDIAN
-#define SIZEOF_UNSIGNED_SHORT 0
-#define SIZEOF_UNSIGNED_INT 0
-#define SIZEOF_UNSIGNED_LONG 0
-#define SIZEOF_UNSIGNED_LONG_LONG 0
-#undef HAVE_NTOHS /* if defined, system provides ntohs() */
-#undef HAVE_NTOHL /* if defined, system provides ntohl() */
-#undef HAVE_HTONS /* if defined, system provides htons() */
-#undef HAVE_HTONL /* if defined, system provides htonl() */
-#undef HAVE_STRTOUL
-#undef HAVE_STRTOULL
-#if defined HAVE_NTOHL && defined HAVE_NTOHS && defined HAVE_HTONS && defined HAVE_HTONL
-#define USE_HOST_BYTESWAP_FUNCTIONS 1
-#endif
-
-/* On some machines like Alphas, strtoull doesn't exist, but since
- * longs are 64 bits anyway and we never call strtoull except for
- * a sqd_uint64, strotul will work... probably.
- */
-#if SIZEOF_UNSIGNED_LONG == 8 && defined HAVE_STRTOUL && ! defined HAVE_STRTOULL
-#define strtoull strtoul
-#define HAVE_STRTOULL /* liar! */
-#endif
-
-/*****************************************************************
- * Can we support arithmetic 64-bit file offsets?
- * four possible models checked for:
- * 1. ftello(), fseeko() with 64-bit off_t
- * 2. ftello64(), fseeko64() with 64-bit off64_t
- * 3. ftell64(), fseek64() with 64-bit integer
- * 4. fgetpos(), fsetpos() with an fpos_t that happens to be a
- * 64-bit integer, even though ANSI says we're not supposed to know
- * anything about fpos_t's internals.
- * Based on what ./configure tells us about these, we set
- * HAS_64BIT_FILE_OFFSETS or not.
- *
- * In all cases, we also check that we HAVE_STRTOULL; ssi.c has to
- * make a call. HP/UX 11, for example, does not provide strtoull(),
- * despite being 64-bit.
- *****************************************************************/
-#undef HAVE_FTELLO
-#undef HAVE_FSEEKO
-#undef HAVE_FTELLO64
-#undef HAVE_FSEEKO64
-#undef HAVE_FTELL64
-#undef HAVE_FSEEK64
-#undef ARITHMETIC_FPOS_T
-#undef HAVE_STAT64
-#define SIZEOF_FPOS_T -1
-#define SIZEOF_OFF_T -1
-#define SIZEOF_OFF64_T -1
-
-#if defined HAVE_STRTOULL && defined HAVE_FTELLO && defined HAVE_FSEEKO && SIZEOF_OFF_T == 8
-#define HAS_64BIT_FILE_OFFSETS 1
-#elif defined HAVE_STRTOULL && defined HAVE_FTELLO64 && defined HAVE_FSEEKO64 && SIZEOF_OFF64_T == 8
-#define HAS_64BIT_FILE_OFFSETS 1
-#elif defined HAVE_STRTOULL && defined HAVE_FTELL64 && defined HAVE_FSEEK64
-#define HAS_64BIT_FILE_OFFSETS 1
-#elif defined HAVE_STRTOULL && defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8
-#define HAS_64BIT_FILE_OFFSETS 1
-#else
-#undef HAS_64BIT_FILE_OFFSETS
-#endif
-
-
-#endif /* SQUIDCONFH_INCLUDED */
diff --git a/squid/squidcore.c b/squid/squidcore.c
deleted file mode 100644
index 955cd4e..0000000
--- a/squid/squidcore.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- ************************************************************/
-
-/* squidcore.c
- * SRE, Sun Jun 20 17:19:04 1999 [Graeme's kitchen]
- *
- * Core functions for SQUID library.
- * CVS $Id: squidcore.c,v 1.4 2003/09/27 20:31:15 eddy Exp $
- */
-
-#include "squidconf.h"
-#include "squid.h"
-
-#include <stdio.h>
-
-/* Function: SqdBanner()
- * Date: SRE, Sun Jun 20 17:19:41 1999 [Graeme's kitchen]
- *
- * Purpose: Print a package version and copyright banner.
- * Used by all the main()'s in squid.
- *
- * Expects to be able to pick up preprocessor #define's from squidconf.h:
- * symbol example
- * ------ --------------
- * SQUID_VERSION "2.0.42"
- * SQUID_DATE "April 1999"
- * SQUID_COPYRIGHT "Copyright (C) 1992-1999 Washington University School of Medicine"
- * SQUID_LICENSE "Freely distributed under the GNU General Public License (GPL)."
- *
- * This gives us a general mechanism to update release information
- * without changing multiple points in the code.
- *
- * Args: fp - where to print it
- * banner - one-line program description, e.g.:
- * "foobar - make bars from foo with elan"
- * Returns: (void)
- */
-void
-SqdBanner(FILE *fp, char *banner)
-{
- fprintf(fp, "%s\n", banner);
- fprintf(fp, "SQUID %s (%s)\n", SQUID_VERSION, SQUID_DATE);
- fprintf(fp, "%s\n", SQUID_COPYRIGHT);
- fprintf(fp, "%s\n", SQUID_LICENSE);
- fprintf(fp, "- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n");
-}
-
-
diff --git a/squid/sre_ctype.c b/squid/sre_ctype.c
deleted file mode 100644
index 918ebe4..0000000
--- a/squid/sre_ctype.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* sre_ctype.c
- *
- * For portability. Some systems have functions tolower, toupper
- * as macros (for instance, MIPS M-2000 RISC/os!)
- *
- * CVS $Id: sre_ctype.c,v 1.4 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <ctype.h>
-#include "squid.h"
-
-int
-sre_tolower(int c)
-{
- if (isupper(c)) return tolower(c);
- else return c;
-}
-
-int
-sre_toupper(int c)
-{
- if (islower(c)) return toupper(c);
- else return c;
-}
-
diff --git a/squid/sre_math.c b/squid/sre_math.c
deleted file mode 100644
index 5c82ec1..0000000
--- a/squid/sre_math.c
+++ /dev/null
@@ -1,334 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* sre_math.c
- *
- * Portability for and extensions to C math library.
- * RCS $Id: sre_math.c,v 1.13 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "squid.h"
-
-
-/* Function: Linefit()
- *
- * Purpose: Given points x[0..N-1] and y[0..N-1], fit to
- * a straight line y = a + bx.
- * a, b, and the linear correlation coefficient r
- * are filled in for return.
- *
- * Args: x - x values of data
- * y - y values of data
- * N - number of data points
- * ret_a - RETURN: intercept
- * ret_b - RETURN: slope
- * ret_r - RETURN: correlation coefficient
- *
- * Return: 1 on success, 0 on failure.
- */
-int
-Linefit(float *x, float *y, int N, float *ret_a, float *ret_b, float *ret_r)
-{
- float xavg, yavg;
- float sxx, syy, sxy;
- int i;
-
- /* Calculate averages, xavg and yavg
- */
- xavg = yavg = 0.0;
- for (i = 0; i < N; i++)
- {
- xavg += x[i];
- yavg += y[i];
- }
- xavg /= (float) N;
- yavg /= (float) N;
-
- sxx = syy = sxy = 0.0;
- for (i = 0; i < N; i++)
- {
- sxx += (x[i] - xavg) * (x[i] - xavg);
- syy += (y[i] - yavg) * (y[i] - xavg);
- sxy += (x[i] - xavg) * (y[i] - yavg);
- }
- *ret_b = sxy / sxx;
- *ret_a = yavg - xavg*(*ret_b);
- *ret_r = sxy / (sqrt(sxx) * sqrt(syy));
- return 1;
-}
-
-
-/* Function: WeightedLinefit()
- *
- * Purpose: Given points x[0..N-1] and y[0..N-1] with
- * variances (measurement errors) var[0..N-1],
- * fit to a straight line y = mx + b.
- *
- * Method: Algorithm from Numerical Recipes in C, [Press88].
- *
- * Return: (void)
- * ret_m contains slope; ret_b contains intercept
- */
-void
-WeightedLinefit(float *x, float *y, float *var, int N, float *ret_m, float *ret_b)
-{
- int i;
- double s;
- double sx, sy;
- double sxx, sxy;
- double delta;
- double m, b;
-
- s = sx = sy = sxx = sxy = 0.;
- for (i = 0; i < N; i++)
- {
- s += 1./var[i];
- sx += x[i] / var[i];
- sy += y[i] / var[i];
- sxx += x[i] * x[i] / var[i];
- sxy += x[i] * y[i] / var[i];
- }
-
- delta = s * sxx - (sx * sx);
- b = (sxx * sy - sx * sxy) / delta;
- m = (s * sxy - sx * sy) / delta;
-
- *ret_m = m;
- *ret_b = b;
-}
-
-
-/* Function: Gammln()
- *
- * Returns the natural log of the gamma function of x.
- * x is > 0.0.
- *
- * Adapted from a public domain implementation in the
- * NCBI core math library. Thanks to John Spouge and
- * the NCBI. (According to the NCBI, that's Dr. John
- * "Gammas Galore" Spouge to you, pal.)
- */
-double
-Gammln(double x)
-{
- int i;
- double xx, tx;
- double tmp, value;
- static double cof[11] = {
- 4.694580336184385e+04,
- -1.560605207784446e+05,
- 2.065049568014106e+05,
- -1.388934775095388e+05,
- 5.031796415085709e+04,
- -9.601592329182778e+03,
- 8.785855930895250e+02,
- -3.155153906098611e+01,
- 2.908143421162229e-01,
- -2.319827630494973e-04,
- 1.251639670050933e-10
- };
-
- /* Protect against x=0. We see this in Dirichlet code,
- * for terms alpha = 0. This is a severe hack but it is effective
- * and (we think?) safe. (due to GJM)
- */
- if (x <= 0.0) return 999999.;
-
- xx = x - 1.0;
- tx = tmp = xx + 11.0;
- value = 1.0;
- for (i = 10; i >= 0; i--) /* sum least significant terms first */
- {
- value += cof[i] / tmp;
- tmp -= 1.0;
- }
- value = log(value);
- tx += 0.5;
- value += 0.918938533 + (xx+0.5)*log(tx) - tx;
- return value;
-}
-
-
-/* 2D matrix operations
- */
-float **
-FMX2Alloc(int rows, int cols)
-{
- float **mx;
- int r;
-
- mx = (float **) MallocOrDie(sizeof(float *) * rows);
- mx[0] = (float *) MallocOrDie(sizeof(float) * rows * cols);
- for (r = 1; r < rows; r++)
- mx[r] = mx[0] + r*cols;
- return mx;
-}
-void
-FMX2Free(float **mx)
-{
- free(mx[0]);
- free(mx);
-}
-double **
-DMX2Alloc(int rows, int cols)
-{
- double **mx;
- int r;
-
- mx = (double **) MallocOrDie(sizeof(double *) * rows);
- mx[0] = (double *) MallocOrDie(sizeof(double) * rows * cols);
- for (r = 1; r < rows; r++)
- mx[r] = mx[0] + r*cols;
- return mx;
-}
-void
-DMX2Free(double **mx)
-{
- free(mx[0]);
- free(mx);
-}
-/* Function: FMX2Multiply()
- *
- * Purpose: Matrix multiplication.
- * Multiply an m x p matrix A by a p x n matrix B,
- * giving an m x n matrix C.
- * Matrix C must be a preallocated matrix of the right
- * size.
- */
-void
-FMX2Multiply(float **A, float **B, float **C, int m, int p, int n)
-{
- int i, j, k;
-
- for (i = 0; i < m; i++)
- for (j = 0; j < n; j++)
- {
- C[i][j] = 0.;
- for (k = 0; k < p; k++)
- C[i][j] += A[i][p] * B[p][j];
- }
-}
-
-
-/* Function: IncompleteGamma()
- *
- * Purpose: Returns 1 - P(a,x) where:
- * P(a,x) = \frac{1}{\Gamma(a)} \int_{0}^{x} t^{a-1} e^{-t} dt
- * = \frac{\gamma(a,x)}{\Gamma(a)}
- * = 1 - \frac{\Gamma(a,x)}{\Gamma(a)}
- *
- * Used in a chi-squared test: for a X^2 statistic x
- * with v degrees of freedom, call:
- * p = IncompleteGamma(v/2., x/2.)
- * to get the probability p that a chi-squared value
- * greater than x could be obtained by chance even for
- * a correct model. (i.e. p should be large, say
- * 0.95 or more).
- *
- * Method: Based on ideas from Numerical Recipes in C, Press et al.,
- * Cambridge University Press, 1988.
- *
- * Args: a - for instance, degrees of freedom / 2 [a > 0]
- * x - for instance, chi-squared statistic / 2 [x >= 0]
- *
- * Return: 1 - P(a,x).
- */
-double
-IncompleteGamma(double a, double x)
-{
- int iter; /* iteration counter */
-
- if (a <= 0.) Die("IncompleteGamma(): a must be > 0");
- if (x < 0.) Die("IncompleteGamma(): x must be >= 0");
-
- /* For x > a + 1 the following gives rapid convergence;
- * calculate 1 - P(a,x) = \frac{\Gamma(a,x)}{\Gamma(a)}:
- * use a continued fraction development for \Gamma(a,x).
- */
- if (x > a+1)
- {
- double oldp; /* previous value of p */
- double nu0, nu1; /* numerators for continued fraction calc */
- double de0, de1; /* denominators for continued fraction calc */
-
- nu0 = 0.; /* A_0 = 0 */
- de0 = 1.; /* B_0 = 1 */
- nu1 = 1.; /* A_1 = 1 */
- de1 = x; /* B_1 = x */
-
- oldp = nu1;
- for (iter = 1; iter < 100; iter++)
- {
- /* Continued fraction development:
- * set A_j = b_j A_j-1 + a_j A_j-2
- * B_j = b_j B_j-1 + a_j B_j-2
- * We start with A_2, B_2.
- */
- /* j = even: a_j = iter-a, b_j = 1 */
- /* A,B_j-2 are in nu0, de0; A,B_j-1 are in nu1,de1 */
- nu0 = nu1 + ((double)iter - a) * nu0;
- de0 = de1 + ((double)iter - a) * de0;
-
- /* j = odd: a_j = iter, b_j = x */
- /* A,B_j-2 are in nu1, de1; A,B_j-1 in nu0,de0 */
- nu1 = x * nu0 + (double) iter * nu1;
- de1 = x * de0 + (double) iter * de1;
-
- /* rescale */
- if (de1 != 0.)
- {
- nu0 /= de1;
- de0 /= de1;
- nu1 /= de1;
- de1 = 1.;
- }
- /* check for convergence */
- if (fabs((nu1-oldp)/nu1) < 1.e-7)
- return nu1 * exp(a * log(x) - x - Gammln(a));
-
- oldp = nu1;
- }
- Die("IncompleteGamma(): failed to converge using continued fraction approx");
- }
- else /* x <= a+1 */
- {
- double p; /* current sum */
- double val; /* current value used in sum */
-
- /* For x <= a+1 we use a convergent series instead:
- * P(a,x) = \frac{\gamma(a,x)}{\Gamma(a)},
- * where
- * \gamma(a,x) = e^{-x}x^a \sum_{n=0}{\infty} \frac{\Gamma{a}}{\Gamma{a+1+n}} x^n
- * which looks appalling but the sum is in fact rearrangeable to
- * a simple series without the \Gamma functions:
- * = \frac{1}{a} + \frac{x}{a(a+1)} + \frac{x^2}{a(a+1)(a+2)} ...
- * and it's obvious that this should converge nicely for x <= a+1.
- */
-
- p = val = 1. / a;
- for (iter = 1; iter < 10000; iter++)
- {
- val *= x / (a+(double)iter);
- p += val;
-
- if (fabs(val/p) < 1.e-7)
- return 1. - p * exp(a * log(x) - x - Gammln(a));
- }
- Die("IncompleteGamma(): failed to converge using series approx");
- }
- /*NOTREACHED*/
- return 0.;
-}
-
diff --git a/squid/sre_random.c b/squid/sre_random.c
deleted file mode 100644
index 81929db..0000000
--- a/squid/sre_random.c
+++ /dev/null
@@ -1,315 +0,0 @@
-/* sre_random.c
- *
- * Portable random number generator, and sampling routines.
- *
- * SRE, Tue Oct 1 15:24:11 2002 [St. Louis]
- * CVS $Id: sre_random.c,v 1.2 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include "sre_random.h"
-
-static int sre_randseed = 42; /* default seed for sre_random() */
-
-/* Function: sre_random()
- *
- * Purpose: Return a uniform deviate x, 0.0 <= x < 1.0.
- *
- * sre_randseed is a static variable, set
- * by sre_srandom(). When it is non-zero,
- * we re-seed.
- *
- * Implements L'Ecuyer's algorithm for combining output
- * of two linear congruential generators, plus a Bays-Durham
- * shuffle. This is essentially ran2() from Numerical Recipes,
- * sans their nonhelpful Rand/McNally-esque code obfuscation.
- *
- * Overflow errors are avoided by Schrage's algorithm:
- * az % m = a(z%q) - r(z/q) (+m if <0)
- * where q=m/a, r=m%a
- *
- * Requires that long int's have at least 32 bits.
- * This function uses statics and is NOT THREADSAFE.
- *
- * Reference: Press et al. Numerical Recipes in C, 1992.
- *
- * Reliable and portable, but slow. Benchmarks on wrasse,
- * using Linux gcc and Linux glibc rand() (see randspeed, in Testsuite):
- * sre_random(): 0.5 usec/call
- * rand(): 0.2 usec/call
- */
-double
-sre_random(void)
-{
- static long rnd1; /* random number from LCG1 */
- static long rnd2; /* random number from LCG2 */
- static long rnd; /* random number we return */
- static long tbl[64]; /* table for Bays/Durham shuffle */
- long x,y;
- int i;
-
- /* Magic numbers a1,m1, a2,m2 from L'Ecuyer, for 2 LCGs.
- * q,r derive from them (q=m/a, r=m%a) and are needed for Schrage's algorithm.
- */
- long a1 = 40014;
- long m1 = 2147483563;
- long q1 = 53668;
- long r1 = 12211;
-
- long a2 = 40692;
- long m2 = 2147483399;
- long q2 = 52774;
- long r2 = 3791;
-
- if (sre_randseed > 0)
- {
- rnd1 = sre_randseed;
- rnd2 = sre_randseed;
- /* Fill the table for Bays/Durham */
- for (i = 0; i < 64; i++) {
- x = a1*(rnd1%q1); /* LCG1 in action... */
- y = r1*(rnd1/q1);
- rnd1 = x-y;
- if (rnd1 < 0) rnd1 += m1;
-
- x = a2*(rnd2%q2); /* LCG2 in action... */
- y = r2*(rnd2/q2);
- rnd2 = x-y;
- if (rnd2 < 0) rnd2 += m2;
-
- tbl[i] = rnd1-rnd2;
- if (tbl[i] < 0) tbl[i] += m1;
- }
- sre_randseed = 0; /* drop the flag. */
- }/* end of initialization*/
-
-
- x = a1*(rnd1%q1); /* LCG1 in action... */
- y = r1*(rnd1/q1);
- rnd1 = x-y;
- if (rnd1 < 0) rnd1 += m1;
-
- x = a2*(rnd2%q2); /* LCG2 in action... */
- y = r2*(rnd2/q2);
- rnd2 = x-y;
- if (rnd2 < 0) rnd2 += m2;
-
- /* Choose our random number from the table... */
- i = (int) (((double) rnd / (double) m1) * 64.);
- rnd = tbl[i];
- /* and replace with a new number by L'Ecuyer. */
- tbl[i] = rnd1-rnd2;
- if (tbl[i] < 0) tbl[i] += m1;
-
- return ((double) rnd / (double) m1);
-}
-
-/* Function: sre_srandom()
- *
- * Purpose: Initialize with a random seed. Seed must be
- * >= 0 to work; we silently enforce this.
- */
-void
-sre_srandom(int seed)
-{
- if (seed < 0) seed = -1 * seed;
- if (seed == 0) seed = 42;
- sre_randseed = seed;
-}
-
-/* Function: sre_random_positive()
- * Date: SRE, Wed Apr 17 13:34:32 2002 [St. Louis]
- *
- * Purpose: Assure 0 < x < 1 (positive uniform deviate)
- */
-double
-sre_random_positive(void)
-{
- double x;
- do { x = sre_random(); } while (x == 0.0);
- return x;
-}
-
-/* Function: ExponentialRandom()
- * Date: SRE, Mon Sep 6 21:24:29 1999 [St. Louis]
- *
- * Purpose: Pick an exponentially distributed random variable
- * 0 > x >= infinity
- *
- * Args: (void)
- *
- * Returns: x
- */
-double
-ExponentialRandom(void)
-{
- double x;
-
- do x = sre_random(); while (x == 0.0);
- return -log(x);
-}
-
-/* Function: Gaussrandom()
- *
- * Pick a Gaussian-distributed random variable
- * with some mean and standard deviation, and
- * return it.
- *
- * Based on RANLIB.c public domain implementation.
- * Thanks to the authors, Barry W. Brown and James Lovato,
- * University of Texas, M.D. Anderson Cancer Center, Houston TX.
- * Their implementation is from Ahrens and Dieter, "Extensions
- * of Forsythe's method for random sampling from the normal
- * distribution", Math. Comput. 27:927-937 (1973).
- *
- * Impenetrability of the code is to be blamed on its FORTRAN/f2c lineage.
- *
- */
-double
-Gaussrandom(double mean, double stddev)
-{
- static double a[32] = {
- 0.0,3.917609E-2,7.841241E-2,0.11777,0.1573107,0.1970991,0.2372021,0.2776904, 0.3186394,0.36013,0.4022501,0.4450965,0.4887764,0.5334097,0.5791322,
- 0.626099,0.6744898,0.7245144,0.7764218,0.8305109,0.8871466,0.9467818,
- 1.00999,1.077516,1.150349,1.229859,1.318011,1.417797,1.534121,1.67594,
- 1.862732,2.153875
- };
- static double d[31] = {
- 0.0,0.0,0.0,0.0,0.0,0.2636843,0.2425085,0.2255674,0.2116342,0.1999243,
- 0.1899108,0.1812252,0.1736014,0.1668419,0.1607967,0.1553497,0.1504094,
- 0.1459026,0.14177,0.1379632,0.1344418,0.1311722,0.128126,0.1252791,
- 0.1226109,0.1201036,0.1177417,0.1155119,0.1134023,0.1114027,0.1095039
- };
- static double t[31] = {
- 7.673828E-4,2.30687E-3,3.860618E-3,5.438454E-3,7.0507E-3,8.708396E-3,
- 1.042357E-2,1.220953E-2,1.408125E-2,1.605579E-2,1.81529E-2,2.039573E-2,
- 2.281177E-2,2.543407E-2,2.830296E-2,3.146822E-2,3.499233E-2,3.895483E-2,
- 4.345878E-2,4.864035E-2,5.468334E-2,6.184222E-2,7.047983E-2,8.113195E-2,
- 9.462444E-2,0.1123001,0.136498,0.1716886,0.2276241,0.330498,0.5847031
- };
- static double h[31] = {
- 3.920617E-2,3.932705E-2,3.951E-2,3.975703E-2,4.007093E-2,4.045533E-2,
- 4.091481E-2,4.145507E-2,4.208311E-2,4.280748E-2,4.363863E-2,4.458932E-2,
- 4.567523E-2,4.691571E-2,4.833487E-2,4.996298E-2,5.183859E-2,5.401138E-2,
- 5.654656E-2,5.95313E-2,6.308489E-2,6.737503E-2,7.264544E-2,7.926471E-2,
- 8.781922E-2,9.930398E-2,0.11556,0.1404344,0.1836142,0.2790016,0.7010474
- };
- static long i;
- static double snorm,u,s,ustar,aa,w,y,tt;
-
- u = sre_random();
- s = 0.0;
- if(u > 0.5) s = 1.0;
- u += (u-s);
- u = 32.0*u;
- i = (long) (u);
- if(i == 32) i = 31;
- if(i == 0) goto S100;
- /*
- * START CENTER
- */
- ustar = u-(double)i;
- aa = *(a+i-1);
-S40:
- if(ustar <= *(t+i-1)) goto S60;
- w = (ustar-*(t+i-1))**(h+i-1);
-S50:
- /*
- * EXIT (BOTH CASES)
- */
- y = aa+w;
- snorm = y;
- if(s == 1.0) snorm = -y;
- return (stddev*snorm + mean);
-S60:
- /*
- * CENTER CONTINUED
- */
- u = sre_random();
- w = u*(*(a+i)-aa);
- tt = (0.5*w+aa)*w;
- goto S80;
-S70:
- tt = u;
- ustar = sre_random();
-S80:
- if(ustar > tt) goto S50;
- u = sre_random();
- if(ustar >= u) goto S70;
- ustar = sre_random();
- goto S40;
-S100:
- /*
- * START TAIL
- */
- i = 6;
- aa = *(a+31);
- goto S120;
-S110:
- aa += *(d+i-1);
- i += 1;
-S120:
- u += u;
- if(u < 1.0) goto S110;
- u -= 1.0;
-S140:
- w = u**(d+i-1);
- tt = (0.5*w+aa)*w;
- goto S160;
-S150:
- tt = u;
-S160:
- ustar = sre_random();
- if(ustar > tt) goto S50;
- u = sre_random();
- if(ustar >= u) goto S150;
- u = sre_random();
- goto S140;
-}
-
-
-/* Functions: DChoose(), FChoose()
- *
- * Purpose: Make a random choice from a normalized distribution.
- * DChoose() is for double-precision vectors;
- * FChoose() is for single-precision float vectors.
- * Returns the number of the choice.
- */
-int
-DChoose(double *p, int N)
-{
- double roll; /* random fraction */
- double sum; /* integrated prob */
- int i; /* counter over the probs */
-
- roll = sre_random();
- sum = 0.0;
- for (i = 0; i < N; i++)
- {
- sum += p[i];
- if (roll < sum) return i;
- }
- return (int) (sre_random() * N); /* bulletproof */
-}
-int
-FChoose(float *p, int N)
-{
- float roll; /* random fraction */
- float sum; /* integrated prob */
- int i; /* counter over the probs */
-
- roll = sre_random();
- sum = 0.0;
- for (i = 0; i < N; i++)
- {
- sum += p[i];
- if (roll < sum) return i;
- }
- return (int) (sre_random() * N); /* bulletproof */
-}
-
-
diff --git a/squid/sre_random.h b/squid/sre_random.h
deleted file mode 100644
index df8a4cd..0000000
--- a/squid/sre_random.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* sre_random.h
- * Header file for sre_random.c
- *
- * SRE, Tue Oct 1 15:24:29 2002
- * CVS $Id: sre_random.h,v 1.1 2002/10/09 14:26:09 eddy Exp $
- */
-
-extern double sre_random(void);
-extern void sre_srandom(int seed);
-extern double sre_random_positive(void);
-extern double ExponentialRandom(void);
-extern double Gaussrandom(double mean, double stddev);
-extern int DChoose(double *p, int N);
-extern int FChoose(float *p, int N);
-
-#define CHOOSE(a) ((int) (sre_random() * (a)))
-
-
diff --git a/squid/sre_string.c b/squid/sre_string.c
deleted file mode 100644
index 6845721..0000000
--- a/squid/sre_string.c
+++ /dev/null
@@ -1,527 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* sre_string.c
- *
- * my library of extra string functions. Some for portability
- * across UNIXes
- *
- * CVS $Id: sre_string.c,v 1.14 2003/05/26 16:21:50 eddy Exp $
- */
-
-#include "squidconf.h"
-#include "squid.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <ctype.h>
-
-
-/* Function: Strdup()
- *
- * Purpose: Implementation of the common (but non-ANSI) function
- * strdup(). Robust against being passed a NULL pointer.
- *
- */
-char *
-Strdup(char *s)
-{
- char *new;
- if (s == NULL) return NULL;
- if ((new = (char *) malloc (strlen(s) +1)) == NULL) return NULL;
- strcpy(new, s);
- return new;
-}
-
-/* Function: StringChop()
- * Date: SRE, Wed Oct 29 12:10:02 1997 [TWA 721]
- *
- * Purpose: Chop trailing whitespace off of a string.
- */
-void
-StringChop(char *s)
-{
- int i;
-
- i = strlen(s) - 1; /* set i at last char in string */
- while (i >= 0 && isspace((int) s[i])) i--; /* i now at last non-whitespace char, or -1 */
- s[i+1] = '\0';
-}
-
-int
-Strinsert(char *s1, /* string to insert a char into */
- char c, /* char to insert */
- int pos) /* position in s1 to insert c at */
-{
- char oldc;
- char *s;
-
- for (s = s1 + pos; c; s++)
- {
- /* swap current char for inserted one */
- oldc = *s; /* pick up current */
- *s = c; /* put down inserted one */
- c = oldc; /* old becomes next to insert */
- }
- *s = '\0';
-
- return 1;
-}
-
-
-int
-Strdelete(char *s1, /* string to delete a char from */
- int pos) /* position of char to delete 0..n-1 */
-{
- char *s;
-
- for (s = s1 + pos; *s; s++)
- *s = *(s + 1);
-
- return 1;
-}
-
-void
-s2lower(char *s)
-{
- for (; *s != '\0'; s++)
- *s = sre_tolower((int) *s);
-}
-
-void
-s2upper(char *s)
-{
- for (; *s != '\0'; s++)
- *s = sre_toupper((int) *s);
-}
-
-
-void *
-sre_malloc(char *file, int line, size_t size)
-{
- void *ptr;
-
- SQD_DPRINTF3(("MALLOC: %d bytes (file %s line %d)\n", size, file, line));
- if ((ptr = malloc (size)) == NULL)
- Die("malloc of %ld bytes failed: file %s line %d", size, file, line);
- return ptr;
-}
-
-void *
-sre_realloc(char *file, int line, void *p, size_t size)
-{
- void *ptr;
-
- if ((ptr = realloc(p, size)) == NULL)
- Die("realloc of %ld bytes failed: file %s line %d", size, file, line);
- return ptr;
-}
-
-
-
-/* Function: Free2DArray(), Free3DArray()
- * Date: SRE, Tue Jun 1 14:47:14 1999 [St. Louis]
- *
- * Purpose: Convenience functions for free'ing 2D
- * and 3D pointer arrays. Tolerates any of the
- * pointers being NULL, to allow "sparse"
- * arrays.
- *
- * Args: p - array to be freed
- * dim1 - n for first dimension
- * dim2 - n for second dimension
- *
- * e.g. a 2d array is indexed p[0..dim1-1][]
- * a 3D array is indexed p[0..dim1-1][0..dim2-1][]
- *
- * Returns: void
- *
- * Diagnostics: (void)
- * "never fails"
- */
-void
-Free2DArray(void **p, int dim1)
-{
- int i;
-
- if (p != NULL) {
- for (i = 0; i < dim1; i++)
- if (p[i] != NULL) free(p[i]);
- free(p);
- }
-}
-void
-Free3DArray(void ***p, int dim1, int dim2)
-{
- int i, j;
-
- if (p != NULL) {
- for (i = 0; i < dim1; i++)
- if (p[i] != NULL) {
- for (j = 0; j < dim2; j++)
- if (p[i][j] != NULL) free(p[i][j]);
- free(p[i]);
- }
- free(p);
- }
-}
-
-
-/* Function: RandomSequence()
- *
- * Purpose: Generate an iid symbol sequence according
- * to some alphabet, alphabet_size, probability
- * distribution, and length. Return the
- * sequence.
- *
- * Args: alphabet - e.g. "ACGT"
- * p - probability distribution [0..n-1]
- * n - number of symbols in alphabet
- * len - length of generated sequence
- *
- * Return: ptr to random sequence, or NULL on failure.
- */
-char *
-RandomSequence(char *alphabet, float *p, int n, int len)
-{
- char *s;
- int x;
-
- s = (char *) MallocOrDie (sizeof(char) * (len+1));
- for (x = 0; x < len; x++)
- s[x] = alphabet[FChoose(p,n)];
- s[x] = '\0';
- return s;
-}
-
-/* Function: sre_fgets()
- * Date: SRE, Thu May 13 10:56:28 1999 [St. Louis]
- *
- * Purpose: Dynamic allocation version of fgets(),
- * capable of reading unlimited line lengths.
- *
- * Args: buf - ptr to a string (may be reallocated)
- * n - ptr to current allocated length of buf,
- * (may be changed)
- * fp - open file ptr for reading
- *
- * Before the first call to sre_fgets(),
- * buf should be initialized to NULL and n to 0.
- * They're a linked pair, so don't muck with the
- * allocation of buf or the value of n while
- * you're still doing sre_fgets() calls with them.
- *
- * Returns: ptr to the buffer on success.
- * NULL on EOF (buf isn't to be used in this case)
- * sre_fgets() *always* results in an allocation
- * in buf.
- *
- * The reason to have it return a ptr to buf
- * is that it makes wrapper macros easy; see
- * MSAFileGetLine() for an example.
- *
- * Example: char *buf;
- * int n;
- * FILE *fp;
- *
- * fp = fopen("my_file", "r");
- * buf = NULL;
- * n = 0;
- * while (sre_fgets(&buf, &n, fp) != NULL)
- * {
- * do stuff with buf;
- * }
- */
-char *
-sre_fgets(char **buf, int *n, FILE *fp)
-{
- char *s;
- int len;
- int pos;
-
- if (*n == 0)
- {
- *buf = MallocOrDie(sizeof(char) * 128);
- *n = 128;
- }
-
- /* Simple case 1. We're sitting at EOF, or there's an error.
- * fgets() returns NULL, so we return NULL.
- */
- if (fgets(*buf, *n, fp) == NULL) return NULL;
-
- /* Simple case 2. fgets() got a string, and it reached EOF.
- * return success status, so caller can use
- * the last line; on the next call we'll
- * return the 0 for the EOF.
- */
- if (feof(fp)) return *buf;
-
- /* Simple case 3. We got a complete string, with \n,
- * and don't need to extend the buffer.
- */
- len = strlen(*buf);
- if ((*buf)[len-1] == '\n') return *buf;
-
- /* The case we're waiting for. We have an incomplete string,
- * and we have to extend the buffer one or more times. Make
- * sure we overwrite the previous fgets's \0 (hence +(n-1)
- * in first step, rather than 128, and reads of 129, not 128).
- */
- pos = (*n)-1;
- while (1) {
- *n += 128;
- *buf = ReallocOrDie(*buf, sizeof(char) * (*n));
- s = *buf + pos;
- if (fgets(s, 129, fp) == NULL) return *buf;
- len = strlen(s);
- if (s[len-1] == '\n') return *buf;
- pos += 128;
- }
- /*NOTREACHED*/
-}
-
-/* Function: sre_strcat()
- * Date: SRE, Thu May 13 09:36:32 1999 [St. Louis]
- *
- * Purpose: Dynamic memory version of strcat().
- * appends src to the string that dest points to,
- * extending allocation for dest if necessary.
- *
- * One timing experiment (100 successive appends of
- * 1-255 char) shows sre_strcat() has about a 20%
- * overhead relative to strcat(). However, if optional
- * length info is passed, sre_strcat() is about 30%
- * faster than strcat().
- *
- * Args: dest - ptr to string (char **), '\0' terminated
- * ldest - length of dest, if known; or -1 if length unknown.
- * src - string to append to dest, '\0' terminated
- * lsrc - length of src, if known; or -1 if length unknown.
- *
- * dest may be NULL, in which case this is
- * the equivalent of dest = Strdup(src).
- *
- * src may also be NULL, in which case
- * dest is unmodified (but why would you want to pass
- * a NULL src?)
- *
- * if both dest and src are NULL, dest is
- * unmodified; it stays NULL.
- *
- * the length parameters are optional. If a -1
- * is passed, sre_strcat() will call strlen() to
- * determine the length itself. Passing length
- * info saves the strlen() calls and can speed things
- * up if lots of successive appends need to be done.
- *
- * Returns: new length of dest (>=0 on success);
- * dest is (probably) reallocated, and modified
- * to a longer string, '\0' terminated.
- */
-int
-sre_strcat(char **dest, int ldest, char *src, int lsrc)
-{
- int len1, len2;
-
- if (ldest < 0) len1 = ((*dest == NULL) ? 0 : strlen(*dest));
- else len1 = ldest;
-
- if (lsrc < 0) len2 = (( src == NULL) ? 0 : strlen(src));
- else len2 = lsrc;
-
- if (len2 == 0) return len1;
-
- if (*dest == NULL) *dest = MallocOrDie(sizeof(char) * (len2+1));
- else *dest = ReallocOrDie(*dest, sizeof(char) * (len1+len2+1));
-
- memcpy((*dest)+len1, src, len2+1);
- return len1+len2;
-}
-
-/* Function: sre_strtok()
- * Date: SRE, Wed May 19 16:30:20 1999 [St. Louis]
- *
- * Purpose: Thread-safe version of strtok().
- *
- * Returns ptr to next token in a string: skips
- * until it reaches a character that is not in the delim
- * string, and sets beginning of token. Skips to
- * next delim character (or '\0') to set the end; replaces that
- * character with '\0'.
- * If there's still more string left, sets s to point to next
- * character after the '\0' that was written, so successive
- * calls extract tokens in succession. If there was no string
- * left, s points at the terminal '\0'.
- *
- * If no token is found, returns NULL.
- *
- * Also returns the length of the token, which
- * may save us a strlen() call in some applications.
- *
- * Limitations:
- * *s can't be a constant string, since we write to it.
- *
- * Example:
- * char *tok;
- * int len;
- * char *s;
- * char buf[50] = "This is a sentence.";
- *
- * s = buf;
- * tok = sre_strtok(&s, " ", &len);
- * tok is "This"; s is "is a sentence."; len is 4.
- * tok = sre_strtok(&s, " ", &len);
- * tok is "is"; s is " a sentence."; len is 2.
- * tok = sre_strtok(&s, " ", &len);
- * tok is "a"; s is "sentence."; len is 1.
- * tok = sre_strtok(&s, " ", &len);
- * tok is "sentence."; s is "\0"; len is 9.
- * tok = sre_strtok(&s, " ", &len);
- * tok is NULL; s is "\0", len is undefined.
- *
- * Args: s - a tmp, modifiable ptr to string
- * delim - characters that delimits tokens
- * len - RETURN: length of token; pass NULL if not wanted
- *
- * Returns: ptr to next token, or NULL if there aren't any.
- */
-char *
-sre_strtok(char **s, char *delim, int *len)
-{
- char *begin, *end;
- int n;
-
- begin = *s;
- begin += strspn(begin, delim);
- if (! *begin) return NULL;
-
- n = strcspn(begin, delim);
- end = begin + n;
- if (*end == '\0') { *s = end;}
- else {
- *end = '\0';
- *s = end+1;
- }
-
- if (len != NULL) *len = n;
- return begin;
-}
-
-
-
-/* Function: sre_strdup()
- * Date: SRE, Wed May 19 17:57:28 1999 [St. Louis]
- *
- * Purpose: A version of the common but non-ANSI strdup()
- * function. Can pass len, if known, to save a
- * strlen() call.
- *
- * Args: s - string to duplicate
- * n - length of string, if known; -1 if unknown.
- *
- * Returns: allocated copy of string.
- * NULL on failure.
- */
-char *
-sre_strdup(char *s, int n)
-{
- char *new;
-
- if (s == NULL) return NULL;
- if (n < 0) n = strlen(s);
- new = MallocOrDie (sizeof(char) * (n+1));
- strcpy(new, s);
- return new;
-}
-
-
-/* Function: sre_strncpy()
- * Date: SRE, Tue Jun 22 10:10:46 1999 [Sanger Centre]
- *
- * Purpose: a strncpy() that makes sure it adds a trailing \0.
- *
- * Args: s1 - string to copy to (allocated n+1 or larger)
- * s2 - string to copy from
- * n - number of chars to copy
- *
- * Returns: s1.
- * Done only for consistency with strncpy(). Not clear
- * why it's useful for a strncpy() to return s1.
- */
-char *
-sre_strncpy(char *s1, char *s2, int n)
-{
- strncpy(s1,s2,n);
- s1[n] = '\0';
- return s1;
-}
-
-/* Function: IsBlankline()
- * Date: SRE, Fri Jun 18 14:36:08 1999 [St. Louis]
- *
- * Purpose: Returns TRUE if string consists solely of whitespace.
- *
- * Args: s - string to check
- */
-int
-IsBlankline(char *s)
-{
- for (; *s != '\0'; s++)
- if (! isspace((int) *s)) return FALSE;
- return TRUE;
-}
-
-
-
-#ifdef CUBS_WIN
-/* A timing test for sre_strcat()
- * cc -O2 -g sre_string.c sre_ctype.c sqerror.c sre_math.c hsregex.c -lm
- * 15.200u - 5.360u = 9.84u if sre_strcat() with no length info passed
- * 13.660u - 5.360u = 8.30u if strcat(), with a single malloc().
- * 11.370u - 5.360u = 6.01u if sre_strcat() with length info passed.
- */
-int main(void)
-{
- float p[4] = {0.25, 0.25, 0.25, 0.25};
- int buflen;
- int len;
- int nappends;
- int nstrings;
- char *s1 = NULL;
- char *s2;
- int i;
-
- nappends = 100;
- nstrings = 1000;
- while (nstrings--)
- {
- /* s1 = malloc(sizeof(char) * (255*nappends+1));
- s1[0] = '\0';
- */
-
- s1 = NULL;
- len = 0;
- for (i = 0; i < nappends; i++)
- {
- buflen = CHOOSE(255) + 1;
- s2 = RandomSequence("ACGT", p, 4, buflen);
-
- /* strcat(s1,s2); */
- if ((len = sre_strcat(&s1, len, s2, buflen)) < 0) exit(1);
- free(s2);
- }
- free(s1);
- }
- exit(0);
-}
-#endif /*CUBS_WIN*/
diff --git a/squid/sreformat_main.c b/squid/sreformat_main.c
deleted file mode 100644
index c911add..0000000
--- a/squid/sreformat_main.c
+++ /dev/null
@@ -1,260 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* sreformat_main.c
- * Mon Sep 13 13:06:51 1993
- *
- * sreformat - reformat sequence files.
- * renamed sreformat from reformat, Tue Jun 30 10:53:38 1998
- *
- * CVS $Id: sreformat_main.c,v 1.19 2003/04/14 16:00:16 eddy Exp $
- */
-
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include "squid.h"
-#include "msa.h"
-
-static char banner[] = "sreformat - convert between sequence formats";
-
-static char usage[] = "\
-Usage: sreformat [-options] <format> <seqfile>\n\
- Output format choices: Unaligned Aligned\n\
- ----------- -------\n\
- fasta stockholm\n\
- embl msf\n\
- genbank a2m\n\
- gcg phylip\n\
- gcgdata clustal\n\
- pir selex\n\
- raw eps\n\n\
- Available options are:\n\
- -h : help; print brief help on version and usage\n\
- -d : force DNA alphabet for nucleic acid sequence\n\
- -r : force RNA alphabet for nucleic acid sequence\n\
- -l : force lower case\n\
- -u : force upper case\n\
- -x : convert non-IUPAC chars in DNA to N's for IUPAC/BLAST compatibility\n\
-";
-
-static char experts[] = "\
- Expert options:\n\
- --informat <s>: input sequence file is in format <s>\n\
- --mingap : remove columns containing all gaps (seqfile=alignment)\n\
- --nogap : remove columns containing any gaps (seqfile=alignment)\n\
- --pfam : modify Stockholm format output to be in PFAM style (1 line/seq)\n\
- --sam : try to convert gaps to SAM style (seqfile=alignment)\n\
- --samfrac <x> : convert to SAM convention; cols w/ gapfrac > x are inserts\n\
- --gapsym <c> : convert all gaps to character '<c>'\n\
-";
-
-static struct opt_s OPTIONS[] = {
- { "-d", TRUE, sqdARG_NONE },
- { "-h", TRUE, sqdARG_NONE },
- { "-l", TRUE, sqdARG_NONE },
- { "-r", TRUE, sqdARG_NONE },
- { "-u", TRUE, sqdARG_NONE },
- { "-x", TRUE, sqdARG_NONE },
- { "--gapsym", FALSE, sqdARG_CHAR },
- { "--informat",FALSE, sqdARG_STRING },
- { "--mingap", FALSE, sqdARG_NONE },
- { "--nogap", FALSE, sqdARG_NONE },
- { "--pfam", FALSE, sqdARG_NONE },
- { "--sam", FALSE, sqdARG_NONE },
- { "--samfrac", FALSE, sqdARG_FLOAT },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-int
-main(int argc, char **argv)
-{
- char *seqfile; /* name of sequence file */
- char *format;
- SQFILE *dbfp; /* open sequence file */
- int fmt; /* format of seqfile */
- int outfmt; /* output format */
- char *seq; /* sequence */
- SQINFO sqinfo;
- int i;
-
- int force_rna; /* TRUE to force RNA alphabet */
- int force_dna; /* TRUE to force DNA alphabet */
- int force_lower; /* TRUE to force lower case */
- int force_upper; /* TRUE to force upper case */
- int x_is_bad; /* TRUE to convert X to N */
- int do_mingap; /* TRUE to remove columns containing all gaps */
- int do_nogap; /* TRUE to remove columns containing any gaps */
- int do_pfam; /* TRUE to make SELEX -> PFAM */
- int samize; /* TRUE to SAMize an A2M conversion */
- float samfrac; /* -1, or gap fraction for a SAM conversion */
- int expect_alignment; /* TRUE to expect an input alignment to convert */
- char gapsym; /* 0 if unset; else = character to use for gaps */
-
- char *optname; /* name of option found by Getopt() */
- char *optarg; /* argument found by Getopt() */
- int optind; /* index in argv[] */
-
- /***********************************************
- * Parse command line
- ***********************************************/
-
- force_rna = FALSE;
- force_dna = FALSE;
- force_upper = FALSE;
- force_lower = FALSE;
- x_is_bad = FALSE;
- do_mingap = FALSE;
- do_nogap = FALSE;
- do_pfam = FALSE;
- samize = FALSE;
- samfrac = -1.0;
- fmt = SQFILE_UNKNOWN;
- expect_alignment = FALSE;
- gapsym = 0;
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg)) {
- if (strcmp(optname, "-a") == 0) expect_alignment= TRUE;
- else if (strcmp(optname, "-d") == 0) force_dna = TRUE;
- else if (strcmp(optname, "-l") == 0) force_lower = TRUE;
- else if (strcmp(optname, "-r") == 0) force_rna = TRUE;
- else if (strcmp(optname, "-u") == 0) force_upper = TRUE;
- else if (strcmp(optname, "-x") == 0) x_is_bad = TRUE;
- else if (strcmp(optname, "--gapsym") == 0) gapsym = *optarg;
- else if (strcmp(optname, "--mingap") == 0) do_mingap = TRUE;
- else if (strcmp(optname, "--nogap") == 0) do_nogap = TRUE;
- else if (strcmp(optname, "--pfam") == 0) do_pfam = TRUE;
- else if (strcmp(optname, "--sam") == 0) samize = TRUE;
- else if (strcmp(optname, "--samfrac") == 0) samfrac = atof(optarg);
- else if (strcmp(optname, "--informat") == 0) {
- fmt = String2SeqfileFormat(optarg);
- if (fmt == SQFILE_UNKNOWN)
- Die("unrecognized sequence file format \"%s\"", optarg);
- }
- else if (strcmp(optname, "-h") == 0) {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc - optind != 2)
- Die("%s\n", usage);
- if (force_lower && force_upper)
- Die("Can't force both upper case and lower case. Stop trying to confuse me.\n%s",
- usage);
- if (force_rna && force_dna)
- Die("Can't force both RNA and DNA. Stop trying to find bugs. You'll be sorry.\n%s",
- usage);
-
- format = argv[optind]; optind++;
- seqfile = argv[optind]; optind++;
-
- /* Try to work around inability to autodetect from a pipe or .gz:
- * assume FASTA format
- */
- if (fmt == SQFILE_UNKNOWN &&
- (Strparse("^.*\\.gz$", seqfile, 0) || strcmp(seqfile, "-") == 0))
- fmt = SQFILE_FASTA;
-
- /***********************************************
- * Figure out what format we're supposed to write
- ***********************************************/
-
- if ((outfmt = String2SeqfileFormat(format)) == SQFILE_UNKNOWN)
- Die("Unknown output format %s\n%s", format, usage);
-
- /***********************************************
- * Reformat the file, printing to stdout.
- ***********************************************/
-
- /* If the output format is an alignment, then the input format
- * has to be an alignment.
- */
- if (IsAlignmentFormat(outfmt))
- {
- MSAFILE *afp;
- MSA *msa;
-
- if ((afp = MSAFileOpen(seqfile, fmt, NULL)) == NULL)
- Die("Alignment file %s could not be opened for reading", seqfile);
-
- while ((msa = MSAFileRead(afp)) != NULL)
- {
- /* If asked, convert upper/lower convention and
- * gap character conventions now
- */
- if (do_mingap) MSAMingap(msa);
- if (do_nogap) MSANogap(msa);
- if (gapsym) AlignmentHomogenousGapsym(msa->aseq, msa->nseq, msa->alen, gapsym);
- if (samize) SAMizeAlignment(msa->aseq, msa->nseq, msa->alen);
- if (samfrac >= 0) SAMizeAlignmentByGapFrac(msa->aseq, msa->nseq, msa->alen, samfrac);
-
- for (i = 0; i < msa->nseq; i++)
- {
- if (force_dna) ToDNA(msa->aseq[i]);
- if (force_rna) ToRNA(msa->aseq[i]);
- if (x_is_bad) ToIUPAC(msa->aseq[i], TRUE);
- if (force_lower) s2lower(msa->aseq[i]);
- if (force_upper) s2upper(msa->aseq[i]);
- }
-
- /* This code block can be replaced with a
- * MSAFileWrite() call someday... SRE Sun Apr 22 19:17:19 2001
- */
- switch (outfmt) {
- case MSAFILE_A2M: WriteA2M(stdout, msa); break;
- case MSAFILE_CLUSTAL: WriteClustal(stdout, msa); break;
- case MSAFILE_MSF: WriteMSF(stdout, msa); break;
- case MSAFILE_PHYLIP: WritePhylip(stdout, msa); break;
- case MSAFILE_SELEX:
- if (do_pfam) WriteSELEXOneBlock(stdout, msa);
- else WriteSELEX(stdout, msa);
- break;
- case MSAFILE_EPS: EPSWriteSmallMSA(stdout, msa); break;
- case MSAFILE_STOCKHOLM:
- if (do_pfam) WriteStockholmOneBlock(stdout, msa);
- else WriteStockholm(stdout, msa);
- break;
- default:
- Die("can't write. no such alignment format %d\n", outfmt);
- }
-
- MSAFree(msa);
- }
- MSAFileClose(afp);
- }
- else
- {
- if ((dbfp = SeqfileOpen(seqfile, fmt, NULL)) == NULL)
- Die("Failed to open sequence file %s for reading", seqfile);
-
- while (ReadSeq(dbfp, fmt, &seq, &sqinfo))
- {
- if (force_dna) ToDNA(seq);
- if (force_rna) ToRNA(seq);
- if (x_is_bad) ToIUPAC(seq, FALSE);
- if (force_lower) s2lower(seq);
- if (force_upper) s2upper(seq);
-
- WriteSeq(stdout, outfmt, seq, &sqinfo);
- FreeSequence(seq, &sqinfo);
- }
- SeqfileClose(dbfp);
- }
-
- return 0;
-}
-
diff --git a/squid/ssi.c b/squid/ssi.c
deleted file mode 100644
index 1073011..0000000
--- a/squid/ssi.c
+++ /dev/null
@@ -1,1530 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include "squid.h"
-#include "ssi.h"
-
-static sqd_uint32 v20magic = 0xf3f3e9b1; /* SSI 1.0: "ssi1" + 0x80808080 */
-static sqd_uint32 v20swap = 0xb1e9f3f3; /* byteswapped */
-
-static int read_i16(FILE *fp, sqd_uint16 *ret_result);
-static int read_i32(FILE *fp, sqd_uint32 *ret_result);
-static int read_i64(FILE *fp, sqd_uint64 *ret_result);
-static int read_offset(FILE *fp, char mode, SSIOFFSET *ret_offset);
-static int write_i16(FILE *fp, sqd_uint16 n);
-static int write_i32(FILE *fp, sqd_uint32 n);
-static int write_i64(FILE *fp, sqd_uint64 n);
-static int write_offset(FILE *fp, SSIOFFSET *offset);
-static int binary_search(SSIFILE *sfp, char *key, int klen, SSIOFFSET *base,
- sqd_uint32 recsize, sqd_uint32 maxidx);
-static int indexfile_position(SSIFILE *sfp, SSIOFFSET *base, sqd_uint32 len,
- sqd_uint32 n);
-static void clear_ssifile(SSIFILE *sfp);
-static sqd_uint64 current_index_size(SSIINDEX *g);
-static int activate_external_sort(SSIINDEX *g);
-static int load_indexfile(SSIFILE *sfp);
-static int parse_pkey_info(char *buf, char mode, struct ssipkey_s *pkey);
-static int parse_skey_info(char *buf, struct ssiskey_s *skey);
-
-/* Function: SSIOpen()
- * Date: SRE, Sun Dec 31 12:40:03 2000 [St. Louis]
- *
- * Purpose: Opens the SSI index file {filename} and returns
- * a SSIFILE * stream thru {ret_sfp}.
- * The caller must eventually close this stream using
- * SSIClose(). More than one index file can be open
- * at once.
- *
- * Args: filename - full path to a SSI index file
- *
- * Returns: Returns 0 on success, nonzero on failure.
- */
-int
-SSIOpen(char *filename, SSIFILE **ret_sfp)
-{
- SSIFILE *sfp = NULL;
- int status;
- if ((sfp = malloc(sizeof(SSIFILE))) == NULL) return SSI_ERR_MALLOC;
- if ((sfp->fp = fopen(filename, "rb")) == NULL) {
- free(sfp);
- return SSI_ERR_NOFILE;
- }
- status = load_indexfile(sfp);
- *ret_sfp = sfp;
- return status;
-}
-/* load_indexfile(): given a SSIFILE structure with an open and positioned
- * stream (fp) -- but no other data loaded -- read the next SSIFILE
- * in from disk. We use this routine without its SSIOpen() wrapper
- * as part of the external mergesort when creating large indices.
- */
-static int
-load_indexfile(SSIFILE *sfp)
-{
- sqd_uint32 magic;
- sqd_uint16 i; /* counter over files */
- int status; /* overall return status if an error is thrown */
-
- status = SSI_ERR_BADFORMAT; /* default: almost every kind of error is a bad format error */
-
- sfp->filename = NULL;
- sfp->fileformat = NULL;
- sfp->fileflags = NULL;
- sfp->bpl = NULL;
- sfp->rpl = NULL;
- sfp->nfiles = 0;
- if (! read_i32(sfp->fp, &magic)) {status = SSI_ERR_BADMAGIC; goto FAILURE; }
- if (magic != v20magic && magic != v20swap) {status = SSI_ERR_BADMAGIC; goto FAILURE; }
- if (! read_i32(sfp->fp, &(sfp->flags))) goto FAILURE;
-
- /* If we have 64-bit offsets, make sure we can deal with them.
- */
-#ifndef HAS_64BIT_FILE_OFFSETS
- if ((sfp->flags & SSI_USE64_INDEX) ||
- (sfp->flags & SSI_USE64))
- { status = SSI_ERR_NO64BIT; goto FAILURE; }
-#endif
-
- sfp->imode = (sfp->flags & SSI_USE64_INDEX) ? SSI_OFFSET_I64 : SSI_OFFSET_I32;
- sfp->smode = (sfp->flags & SSI_USE64) ? SSI_OFFSET_I64 : SSI_OFFSET_I32;
-
- if (! read_i16(sfp->fp, &(sfp->nfiles))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->nprimary))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->nsecondary))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->flen))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->plen))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->slen))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->frecsize))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->precsize))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->srecsize))) goto FAILURE;
-
- if (! read_offset(sfp->fp, sfp->imode, &(sfp->foffset))) goto FAILURE;
- if (! read_offset(sfp->fp, sfp->imode, &(sfp->poffset))) goto FAILURE;
- if (! read_offset(sfp->fp, sfp->imode, &(sfp->soffset))) goto FAILURE;
-
- /* Read the file information and keep it.
- * We expect the number of files to be small, so reading it
- * once should be advantageous overall. If SSI ever had to
- * deal with large numbers of files, you'd probably want to
- * read file information on demand.
- */
- if (sfp->nfiles == 0) goto FAILURE;
- if ((sfp->filename=malloc(sizeof(char *) *sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; }
- for (i = 0; i < sfp->nfiles; i++) sfp->filename[i] = NULL;
- if ((sfp->fileformat=malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; }
- if ((sfp->fileflags =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; }
- if ((sfp->bpl =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; }
- if ((sfp->rpl =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; }
-
- for (i = 0; i < sfp->nfiles; i++)
- {
- /* We have to explicitly position, because header and file
- * records may expand in the future; frecsize and foffset
- * give us forwards compatibility.
- */
- if (indexfile_position(sfp, &(sfp->foffset), sfp->frecsize, i) !=0) goto FAILURE;
- if ((sfp->filename[i] =malloc(sizeof(char)*sfp->flen)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; }
- if (fread(sfp->filename[i],sizeof(char),sfp->flen, sfp->fp)!=sfp->flen) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->fileformat[i]))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->fileflags[i]))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->bpl[i]))) goto FAILURE;
- if (! read_i32(sfp->fp, &(sfp->rpl[i]))) goto FAILURE;
- }
-
- /* Success. Return 0.
- */
- return 0;
-
- FAILURE:
- /* Failure: free the damaged structure, return status code.
- */
- SSIClose(sfp);
- return status;
-}
-
-
-
-/* Function: SSIGetOffsetByName()
- * Date: SRE, Sun Dec 31 13:55:31 2000 [St. Louis]
- *
- * Purpose: Looks up the string {key} in the open index {sfp}.
- * {key} can be either a primary or secondary key. If {key}
- * is found, {*ret_fh} contains a unique handle on
- * the file that contains {key} (suitable for an SSIFileInfo()
- * call, or for comparison to the handle of the last file
- * that was opened for retrieval), and {offset} is filled
- * in with the offset in that file.
- *
- * Args: sfp - open index file
- * key - string to search for
- * ret_fh - RETURN: handle on file that key is in
- * ret_offset - RETURN: offset of the start of that key's record
- *
- * Returns: 0 on success.
- * non-zero on error.
- */
-int
-SSIGetOffsetByName(SSIFILE *sfp, char *key, int *ret_fh,
- SSIOFFSET *ret_offset)
-{
- int status;
- sqd_uint16 fnum;
-
- /* Look in the primary keys.
- */
- status = binary_search(sfp, key, sfp->plen, &(sfp->poffset), sfp->precsize,
- sfp->nprimary);
- if (status == 0) {
- /* We found it as a primary key; get our data & return.
- */
- if (! read_i16(sfp->fp, &fnum)) return SSI_ERR_NODATA;
- *ret_fh = (int) fnum;
- if (! read_offset(sfp->fp, sfp->smode, ret_offset)) return SSI_ERR_NODATA;
-
- return 0; /* success! (we don't need the other key data) */
- } else if (status == SSI_ERR_NO_SUCH_KEY) {
- /* Not in the primary keys? OK, try the secondary keys.
- */
- if (sfp->nsecondary > 0) {
- char *pkey;
- status = binary_search(sfp, key, sfp->slen, &(sfp->soffset), sfp->srecsize,
- sfp->nsecondary);
- if (status != 0) return status;
- if ((pkey = malloc(sizeof(char) * sfp->plen)) == NULL) return SSI_ERR_MALLOC;
- if (fread(pkey, sizeof(char), sfp->plen, sfp->fp) != sfp->plen) return SSI_ERR_NODATA;
-
- status = SSIGetOffsetByName(sfp, pkey, ret_fh, ret_offset);
- free(pkey);
- }
- return status;
-
- } else return status;
- /*NOTREACHED*/
-}
-
-/* Function: SSIGetOffsetByNumber()
- * Date: SRE, Mon Jan 1 19:42:42 2001 [St. Louis]
- *
- * Purpose: Looks up primary key #{n} in the open index {sfp}.
- * {n} ranges from 0..nprimary-1. When key #{n}
- * is found, {*ret_fh} contains a unique
- * handle on the file that contains {key} (suitable
- * for an SSIFileInfo() call, or for comparison to
- * the handle of the last file that was opened for retrieval),
- * and {offset} is filled in with the offset in that file.
- *
- * Args: sfp - open index file
- * n - primary key number to retrieve.
- * ret_fh - RETURN: handle on file that key is in
- * ret_offset - RETURN: offset of the start of that key's record
- *
- * Returns: 0 on success.
- * non-zero on error.
- */
-int
-SSIGetOffsetByNumber(SSIFILE *sfp, int n, int *ret_fh, SSIOFFSET *ret_offset)
-{
- sqd_uint16 fnum;
- char *pkey;
-
- if (n >= sfp->nprimary) return SSI_ERR_NO_SUCH_KEY;
- if (indexfile_position(sfp, &(sfp->poffset), sfp->precsize, n) != 0)
- return SSI_ERR_SEEK_FAILED;
-
- if ((pkey = malloc(sizeof(char) * sfp->plen)) == NULL) return SSI_ERR_MALLOC;
- if (fread(pkey, sizeof(char), sfp->plen, sfp->fp) != sfp->plen) return SSI_ERR_NODATA;
- if (! read_i16(sfp->fp, &fnum)) return SSI_ERR_NODATA;
- if (! read_offset(sfp->fp, sfp->smode, ret_offset)) return SSI_ERR_NODATA;
- *ret_fh = fnum;
- free(pkey);
- return 0;
-}
-
-/* Function: SSIGetSubseqOffset()
- * Date: SRE, Mon Jan 1 19:49:31 2001 [St. Louis]
- *
- * Purpose: Implements SSI_FAST_SUBSEQ.
- *
- * Looks up a primary or secondary {key} in the open
- * index {sfp}. Asks for the nearest offset to a
- * subsequence starting at position {requested_start}
- * in the sequence (numbering the sequence 1..L).
- * If {key} is found, on return, {ret_fh}
- * contains a unique handle on the file that contains
- * {key} (suitable for an SSIFileInfo() call, or for
- * comparison to the handle of the last file that was
- * opened for retrieval); {record_offset} contains the
- * disk offset to the start of the record; {data_offset}
- * contains the disk offset either exactly at the requested
- * residue, or at the start of the line containing the
- * requested residue; {ret_actual_start} contains the
- * coordinate (1..L) of the first valid residue at or
- * after {data_offset}. {ret_actual_start} is <=
- * {requested_start}.
- *
- * Args: sfp - open index file
- * key - primary or secondary key to find
- * requested_start - residue we'd like to start at (1..L)
- * ret_fh - RETURN: handle for file the key is in
- * record_offset - RETURN: offset of entire record
- * data_offset - RETURN: offset of subseq (see above)
- * ret_actual_start- RETURN: coord (1..L) of residue at data_offset
- *
- * Returns: 0 on success, non-zero on failure.
- */
-int
-SSIGetSubseqOffset(SSIFILE *sfp, char *key, int requested_start,
- int *ret_fh, SSIOFFSET *record_offset,
- SSIOFFSET *data_offset, int *ret_actual_start)
-{
- int status;
- sqd_uint32 len;
- int r, b, i, l; /* tmp variables for "clarity", to match docs */
-
- /* Look up the key. Rely on the fact that SSIGetOffsetByName()
- * leaves the index file positioned at the rest of the data for this key.
- */
- status = SSIGetOffsetByName(sfp, key, ret_fh, record_offset);
- if (status != 0) return status;
-
- /* Check that we're allowed to do subseq lookup on that file.
- */
- if (! (sfp->fileflags[*ret_fh] & SSI_FAST_SUBSEQ))
- return SSI_ERR_NO_SUBSEQS;
-
- /* Read the data we need for subseq lookup
- */
- if (! read_offset(sfp->fp, sfp->smode, data_offset)) return SSI_ERR_NODATA;
- if (! read_i32(sfp->fp, &len)) return SSI_ERR_NODATA;
-
- /* Set up tmp variables for clarity of equations below,
- * and to make them match documentation (ssi-format.tex).
- */
- r = sfp->rpl[*ret_fh]; /* residues per line */
- b = sfp->bpl[*ret_fh]; /* bytes per line */
- i = requested_start; /* start position 1..L */
- l = (i-1)/r; /* data line # (0..) that the residue is on */
- if (r == 0 || b == 0) return SSI_ERR_NO_SUBSEQS;
- if (i < 0 || i > len) return SSI_ERR_RANGE;
-
- /* When b = r+1, there's nothing but sequence on each data line (and the \0),
- * and we can find each residue precisely.
- */
- if (b == r+1) {
- if (sfp->smode == SSI_OFFSET_I32) {
- data_offset->mode = SSI_OFFSET_I32;
- data_offset->off.i32 = data_offset->off.i32 + l*b + (i-1)%r;
- } else if (sfp->smode == SSI_OFFSET_I64) {
- data_offset->mode = SSI_OFFSET_I64;
- data_offset->off.i64 = data_offset->off.i64 + l*b + (i-1)%r;
- }
- *ret_actual_start = requested_start;
- } else {
- /* else, there's other stuff on seq lines, so the best
- * we can do easily is to position at start of relevant line.
- */
- if (sfp->smode == SSI_OFFSET_I32) {
- data_offset->mode = SSI_OFFSET_I32;
- data_offset->off.i32 = data_offset->off.i32 + l*b;
- } else if (sfp->smode == SSI_OFFSET_I64) {
- data_offset->mode = SSI_OFFSET_I64;
- data_offset->off.i64 = data_offset->off.i64 + l*b;
- }
- /* yes, the eq below is = 1 + (i-1)/r*r but it's not = i. that's an integer /. */
- *ret_actual_start = 1 + l*r;
- }
- return 0;
-}
-
-/* Function: SSISetFilePosition()
- * Date: SRE, Tue Jan 2 09:13:46 2001 [St. Louis]
- *
- * Purpose: Uses {offset} to sets the file position for {fp}, usually an
- * open sequence file, relative to the start of the file.
- * Hides the details of system-dependent shenanigans necessary for
- * file positioning in large (>2 GB) files.
- *
- * Behaves just like fseek(fp, offset, SEEK_SET) for 32 bit
- * offsets and <2 GB files.
- *
- * Warning: if all else fails, in desperation, it will try to
- * use fsetpos(). This requires making assumptions about fpos_t
- * that may be unwarranted... assumptions that ANSI C prohibits
- * me from making... though I believe the ./configure
- * script robustly tests whether I can play with fpos_t like this.
- *
- * Args: fp - file to position.
- * offset - SSI offset relative to file start.
- *
- * Returns: 0 on success, nonzero on error.
- */
-int
-SSISetFilePosition(FILE *fp, SSIOFFSET *offset)
-{
- if (offset->mode == SSI_OFFSET_I32) {
- if (fseek(fp, offset->off.i32, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED;
- }
-#ifndef HAS_64BIT_FILE_OFFSETS
- else return SSI_ERR_NO64BIT;
-#elif defined HAVE_FSEEKO && SIZEOF_OFF_T == 8
- else if (fseeko(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED;
-#elif defined HAVE_FSEEKO64 && SIZEOF_OFF64_T == 8
- else if (fseeko64(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED;
-#elif defined HAVE_FSEEK64
- else if (fseek64(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED;
-#elif defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8
- else if (fsetpos(fp, &(offset->off.i64)) != 0) return SSI_ERR_SEEK_FAILED;
-#endif
- return 0;
-}
-
-
-/* Function: SSIFileInfo()
- * Date: SRE, Tue Jan 2 10:31:01 2001 [St. Louis]
- *
- * Purpose: Given a file number {fh} in an open index file
- * {sfp}, retrieve file name {ret_filename} and
- * the file format {ret_format}.
- *
- * {ret_filename} is a pointer to a string maintained
- * internally by {sfp}. It should not be free'd;
- * SSIClose(sfp) takes care of it.
- *
- * Args: sfp - open index file
- * fh - handle on file to look up
- * ret_filename - RETURN: name of file n
- * ret_format - RETURN: format of file n
- *
- * Returns: 0 on success, nonzero on failure.
- */
-int
-SSIFileInfo(SSIFILE *sfp, int fh, char **ret_filename, int *ret_format)
-{
- if (fh < 0 || fh >= sfp->nfiles) return SSI_ERR_BADARG;
- *ret_filename = sfp->filename[fh];
- *ret_format = sfp->fileformat[fh];
- return 0;
-}
-
-/* Function: SSIClose()
- * Date: SRE, Sun Dec 31 14:56:37 2000 [St. Louis]
- *
- * Purpose: Close an open {SSIFILE *}.
- *
- * Args: sfp - index file to close.
- *
- * Returns: (void)
- */
-void
-SSIClose(SSIFILE *sfp)
-{
- if (sfp != NULL) {
- clear_ssifile(sfp);
- if (sfp->fp != NULL) fclose(sfp->fp);
- free(sfp);
- }
-}
-/* clear_ssifile(): free the innards of SSIFILE, without
- * destroying the structure or closing the stream.
- */
-static void
-clear_ssifile(SSIFILE *sfp)
-{
- int i;
-
- if (sfp->filename != NULL) {
- for (i = 0; i < sfp->nfiles; i++)
- if (sfp->filename[i] != NULL) free(sfp->filename[i]);
- free(sfp->filename);
- }
- if (sfp->fileformat != NULL) free(sfp->fileformat);
- if (sfp->fileflags != NULL) free(sfp->fileflags);
- if (sfp->bpl != NULL) free(sfp->bpl);
- if (sfp->rpl != NULL) free(sfp->rpl);
-}
-
-
-/* Function: SSIRecommendMode()
- * Date: SRE, Fri Feb 16 08:23:47 2001 [St. Louis]
- *
- * Purpose: Examines the file and determines whether it should be
- * indexed with large file support or not; returns
- * SSI_OFFSET_I32 for most files, SSI_OFFSET_I64 for large
- * files, or -1 on failure.
- *
- * Args: file - name of file to check for size
- *
- * Returns: -1 on failure (including case where file is too big)
- * SSI_OFFSET_I32 for most files (<= 2^31-1 bytes)
- * SSI_OFFSET_I64 for large files (> 2^31-1 bytes)
- */
-int
-SSIRecommendMode(char *file)
-{
-#if HAVE_STAT64
- struct stat64 s1;
- if (stat64(file, &s1) == 0) {
- if (s1.st_size <= 2146483647L) return SSI_OFFSET_I32;
- else return SSI_OFFSET_I64;
- }
-#else
- struct stat s2;
- if (stat(file, &s2) == 0) {
- if (s2.st_size <= 2146483647L) return SSI_OFFSET_I32;
- else return SSI_OFFSET_I64;
- }
-#endif
- return -1;
-}
-
-
-/* Function: SSICreateIndex()
- * Date: SRE, Tue Jan 2 11:23:25 2001 [St. Louis]
- *
- * Purpose: Creates and initializes a SSI index structure.
- * Sequence file offset type is specified by {mode}.
- *
- * Args: mode - SSI_OFFSET_I32 or SSI_OFFSET_I64, sequence file index mode.
- *
- * Returns: ptr to new index structure, or NULL on failure.
- * Caller is responsible for free'ing the returned
- * structure with SSIFreeIndex().
- */
-SSIINDEX *
-SSICreateIndex(int mode)
-{
- SSIINDEX *g;
-
- g = NULL;
- if ((g = malloc(sizeof(SSIINDEX))) == NULL) goto FAILURE;
- g->smode = mode;
- g->imode = SSI_OFFSET_I32; /* index always starts as 32-bit; may get upgraded later */
- g->external = FALSE;
- g->max_ram = SSI_MAXRAM;
-
-#ifndef HAS_64BIT_FILE_OFFSETS
- if (mode == SSI_OFFSET_I64)
- Die("\
-Can't create a 64-bit SSI index on this system, sorry;\n\
-I don't have 64-bit file offset functions available.\n");
-#endif
-
- g->filenames = NULL;
- g->fileformat = NULL;
- g->bpl = NULL;
- g->rpl = NULL;
- g->flen = 0;
- g->nfiles = 0;
-
- g->pkeys = NULL;
- g->plen = 0;
- g->nprimary = 0;
- g->ptmpfile = "tmp.ssi.1"; /* hardcoded, for now. */
- g->ptmp = NULL;
-
- g->skeys = NULL;
- g->slen = 0;
- g->nsecondary = 0;
- g->stmpfile = "tmp.ssi.2"; /* hardcoded, for now. */
- g->stmp = NULL;
-
- /* All mallocs must go after NULL initializations, because of the cleanup strategy;
- * we'll try to free anything non-NULL if a malloc fails.
- */
- if ((g->filenames = malloc(sizeof(char *) * SSI_FILE_BLOCK)) == NULL) goto FAILURE;
- if ((g->fileformat= malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE;
- if ((g->bpl = malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE;
- if ((g->rpl = malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE;
-
- if ((g->pkeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE;
- if ((g->skeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE;
-
- return g;
-
- FAILURE:
- SSIFreeIndex(g); /* free the damaged structure */
- return NULL;
-}
-
-/* Function: SSIGetFilePosition()
- * Date: SRE, Tue Jan 2 09:59:26 2001 [St. Louis]
- *
- * Purpose: Fills {ret_offset} with the current disk
- * offset of {fp}, relative to the start of the file.
- * {mode} is set to either SSI_OFFSET_I32 or
- * SSI_OFFSET_I64. If {mode} is _I32 (32 bit), just wraps
- * a call to ftell(); otherwise deals with system-dependent
- * details of 64-bit file offsets.
- *
- * Args: fp - open stream
- * mode - SSI_OFFSET_I32 or SSI_OFFSET_I64
- * ret_offset - RETURN: file position
- *
- * Returns: 0 on success. nonzero on error.
- */
-int
-SSIGetFilePosition(FILE *fp, int mode, SSIOFFSET *ret_offset)
-{
- if (mode == SSI_OFFSET_I32)
- {
- ret_offset->mode = SSI_OFFSET_I32;
- ret_offset->off.i32 = ftell(fp);
- if (ret_offset->off.i32 == -1) return SSI_ERR_TELL_FAILED;
- }
- else if (mode != SSI_OFFSET_I64) abort(); /* only happens on a coding error */
- else {
- ret_offset->mode = SSI_OFFSET_I64;
-#ifndef HAS_64BIT_FILE_OFFSETS
- return SSI_ERR_NO64BIT;
-#elif defined HAVE_FTELLO && SIZEOF_OFF_T == 8
- if ((ret_offset->off.i64 = ftello(fp)) == -1) return SSI_ERR_TELL_FAILED;
-#elif defined HAVE_FTELLO64 && SIZEOF_OFF64_T == 8
- if ((ret_offset->off.i64 = ftello64(fp)) == -1) return SSI_ERR_TELL_FAILED;
-#elif defined HAVE_FTELL64
- if ((ret_offset->off.i64 = ftell64(fp)) == -1) return SSI_ERR_TELL_FAILED;
-#elif defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8
- if (fgetpos(fp, &(ret_offset->off.i64)) != 0) return SSI_ERR_TELL_FAILED;
-#endif
- }
- return 0;
-}
-
-/* Function: SSIAddFileToIndex()
- * Date: SRE, Tue Jan 2 12:54:36 2001 [St. Louis]
- *
- * Purpose: Adds the sequence file {filename}, which is known to
- * be in format {fmt}, to the index {g}. Creates and returns
- * a unique filehandle {fh} for then associating primary keys
- * with this file using SSIAddPrimaryKeyToIndex().
- *
- * Args: g - active index
- * filename - file to add
- * fmt - format code for this file (e.g. SQFILE_FASTA)
- * ret_fh - RETURN: unique handle for this file
- *
- * Returns: 0 on success; nonzero on error.
- */
-int
-SSIAddFileToIndex(SSIINDEX *g, char *filename, int fmt, int *ret_fh)
-{
- int n;
-
- if (g->nfiles >= SSI_MAXFILES) return SSI_ERR_TOOMANY_FILES;
-
- n = strlen(filename);
- if ((n+1) > g->flen) g->flen = n+1;
-
- g->filenames[g->nfiles] = FileTail(filename, FALSE);
- g->fileformat[g->nfiles] = fmt;
- g->bpl[g->nfiles] = 0;
- g->rpl[g->nfiles] = 0;
- *ret_fh = g->nfiles; /* handle is simply = file number */
- g->nfiles++;
-
- if (g->nfiles % SSI_FILE_BLOCK == 0) {
- g->filenames = realloc(g->filenames, sizeof(char *) * (g->nfiles+SSI_FILE_BLOCK));
- if (g->filenames == NULL) return SSI_ERR_MALLOC;
- g->fileformat= realloc(g->fileformat, sizeof(sqd_uint32) * (g->nfiles+SSI_FILE_BLOCK));
- if (g->fileformat == NULL) return SSI_ERR_MALLOC;
- g->bpl = realloc(g->bpl, sizeof(sqd_uint32) * (g->nfiles+SSI_FILE_BLOCK));
- if (g->bpl == NULL) return SSI_ERR_MALLOC;
- g->rpl = realloc(g->rpl, sizeof(sqd_uint32) * (g->nfiles+SSI_FILE_BLOCK));
- if (g->rpl == NULL) return SSI_ERR_MALLOC;
- }
- return 0;
-}
-
-
-/* Function: SSISetFileForSubseq()
- * Date: SRE, Tue Jan 9 10:02:05 2001 [St. Louis]
- *
- * Purpose: Set SSI_FAST_SUBSEQ for the file indicated by
- * filehandle {fh} in the index {g}, setting
- * parameters {bpl} and {rpl} to the values given.
- * {bpl} is the number of bytes per sequence data line.
- * {rpl} is the number of residues per sequence data line.
- * Caller must be sure that {bpl} and {rpl} do not change
- * on any line of any sequence record in the file
- * (except for the last data line of each record). If
- * this is not the case in this file, SSI_FAST_SUBSEQ
- * will not work, and this routine should not be
- * called.
- *
- * Args: g - the active index
- * fh - handle for file to set SSI_FAST_SUBSEQ on
- * bpl - bytes per data line
- * rpl - residues per data line
- *
- * Returns: 0 on success; 1 on error.
- */
-int
-SSISetFileForSubseq(SSIINDEX *g, int fh, int bpl, int rpl)
-{
- if (fh < 0 || fh >= g->nfiles) return SSI_ERR_BADARG;
- if (bpl <= 0 || rpl <= 0) return SSI_ERR_BADARG;
- g->bpl[fh] = bpl;
- g->rpl[fh] = rpl;
- return 0;
-}
-
-
-/* Function: SSIAddPrimaryKeyToIndex()
- * Date: SRE, Tue Jan 2 11:50:54 2001 [St. Louis]
- *
- * Purpose: Put primary key {key} in the index {g}, while telling
- * the index this primary key is in the file associated
- * with filehandle {fh} (returned by a previous call
- * to SSIAddFileToIndex()), and its record starts at
- * position {r_off} in the file.
- *
- * {d_off} and {L} are optional; they may be left unset
- * by passing NULL and 0, respectively. (If one is
- * provided, both must be provided.) If they are provided,
- * {d_off} gives the position of the first line of sequence
- * data in the record, and {L} gives the length of
- * the sequence in residues. They are used when
- * SSI_FAST_SUBSEQ is set for this file. If SSI_FAST_SUBSEQ
- * is not set for the file, {d_off} and {L} will be
- * ignored by the index reading API even if they are stored
- * by the index writing API, so it doesn't hurt for the
- * indexing program to provide them; typically they
- * won't know whether it's safe to set SSI_FAST_SUBSEQ
- * for the whole file until the whole file has been
- * read and every key has already been added to the index.
- *
- * Args: g - active index
- * key - primary key to add
- * fh - handle on file that this key's in
- * r_off - offset to start of record
- * d_off - offset to start of sequence data
- * L - length of sequence, or 0
- *
- * Returns: 0 on success, nonzero on error.
- */
-int
-SSIAddPrimaryKeyToIndex(SSIINDEX *g, char *key, int fh,
- SSIOFFSET *r_off, SSIOFFSET *d_off, int L)
-{
- int n; /* a string length */
-
- if (fh >= SSI_MAXFILES) return SSI_ERR_TOOMANY_FILES;
- if (g->nprimary >= SSI_MAXKEYS) return SSI_ERR_TOOMANY_KEYS;
- if (L > 0 && d_off == NULL) abort(); /* need both. */
-
- /* Before adding the key: check how big our index is.
- * If it's getting too large, switch to external mode.
- */
- if (!g->external && current_index_size(g) >= g->max_ram)
- if (activate_external_sort(g) != 0) return SSI_ERR_NOFILE;
-
- /* Update maximum pkey length, if needed.
- */
- n = strlen(key);
- if ((n+1) > g->plen) g->plen = n+1;
-
- /* External mode? Simply append to disk...
- */
- if (g->external) {
- if (g->smode == SSI_OFFSET_I32) {
- fprintf(g->ptmp, "%s\t%d\t%lu\t%lu\t%lu\n",
- key, fh, (unsigned long) r_off->off.i32,
- (unsigned long) (d_off == NULL? 0 : d_off->off.i32),
- (unsigned long) L);
- } else {
- fprintf(g->ptmp, "%s\t%d\t%llu\t%llu\t%lu\n",
- key, fh, (unsigned long long) r_off->off.i64,
- (unsigned long long) (d_off == NULL? 0 : d_off->off.i64),
- (unsigned long) L);
- }
- g->nprimary++;
- return 0;
- }
-
- /* Else: internal mode, keep keys in memory...
- */
- if ((g->pkeys[g->nprimary].key = sre_strdup(key, n)) == NULL) return SSI_ERR_MALLOC;
- g->pkeys[g->nprimary].fnum = (sqd_uint16) fh;
- g->pkeys[g->nprimary].r_off = *r_off;
- if (d_off != NULL && L > 0) {
- g->pkeys[g->nprimary].d_off = *d_off;
- g->pkeys[g->nprimary].len = L;
- } else {
- /* yeah, this looks stupid, but look: we have to give a valid
- looking, non-NULL d_off of some sort, or writes will fail.
- It's going to be unused anyway. */
- g->pkeys[g->nprimary].d_off = *r_off;
- g->pkeys[g->nprimary].len = 0;
- }
- g->nprimary++;
-
- if (g->nprimary % SSI_KEY_BLOCK == 0) {
- g->pkeys = realloc(g->pkeys, sizeof(struct ssipkey_s) * (g->nprimary+SSI_KEY_BLOCK));
- if (g->pkeys == NULL) return SSI_ERR_MALLOC;
- }
- return 0;
-}
-
-
-/* Function: SSIAddSecondaryKeyToIndex()
- * Date: SRE, Tue Jan 2 12:44:40 2001 [St. Louis]
- *
- * Purpose: Puts secondary key {key} in the index {g}, associating
- * it with primary key {pkey} that was previously
- * registered by SSIAddPrimaryKeyToIndex().
- *
- * Args: g - active index
- * key - secondary key to add
- * pkey - primary key to associate this key with
- *
- * Returns: 0 on success, nonzero on failure.
- */
-int
-SSIAddSecondaryKeyToIndex(SSIINDEX *g, char *key, char *pkey)
-{
- int n; /* a string length */
-
- if (g->nsecondary >= SSI_MAXKEYS) return SSI_ERR_TOOMANY_KEYS;
-
- /* Before adding the key: check how big our index is.
- * If it's getting too large, switch to external mode.
- */
- if (!g->external && current_index_size(g) >= g->max_ram)
- if (activate_external_sort(g) != 0) return SSI_ERR_NOFILE;
-
- /* Update maximum secondary key length, if necessary.
- */
- n = strlen(key);
- if ((n+1) > g->slen) g->slen = n+1;
-
- /* if external mode: write info to disk.
- */
- if (g->external) {
- fprintf(g->stmp, "%s\t%s\n", key, pkey);
- g->nsecondary++;
- return 0;
- }
-
- /* else, internal mode... store info in memory.
- */
- if ((g->skeys[g->nsecondary].key = sre_strdup(key, n)) == NULL) return SSI_ERR_MALLOC;
- if ((g->skeys[g->nsecondary].pkey = sre_strdup(pkey, -1)) == NULL) return SSI_ERR_MALLOC;
- g->nsecondary++;
-
- if (g->nsecondary % SSI_KEY_BLOCK == 0) {
- g->skeys = realloc(g->skeys, sizeof(struct ssiskey_s) * (g->nsecondary+SSI_KEY_BLOCK));
- if (g->skeys == NULL) return SSI_ERR_MALLOC;
- }
- return 0;
-}
-
-
-
-
-/* Function: SSIWriteIndex()
- * Date: SRE, Tue Jan 2 13:55:56 2001 [St. Louis]
- *
- * Purpose: Writes complete index {g} in SSI format to a
- * binary file {file}. Does all
- * the overhead of sorting the primary and secondary keys,
- * and maintaining the association of secondary keys
- * with primary keys during and after the sort.
- *
- * Args: file - file to write to
- * g - index to sort & write out.
- *
- * Returns: 0 on success, nonzero on error.
- */
-/* needed for qsort() */
-static int
-pkeysort(const void *k1, const void *k2)
-{
- struct ssipkey_s *key1;
- struct ssipkey_s *key2;
- key1 = (struct ssipkey_s *) k1;
- key2 = (struct ssipkey_s *) k2;
- return strcmp(key1->key, key2->key);
-}
-static int
-skeysort(const void *k1, const void *k2)
-{
- struct ssiskey_s *key1;
- struct ssiskey_s *key2;
- key1 = (struct ssiskey_s *) k1;
- key2 = (struct ssiskey_s *) k2;
- return strcmp(key1->key, key2->key);
-}
-int
-SSIWriteIndex(char *file, SSIINDEX *g)
-{
- FILE *fp;
- int status;
- int i;
- sqd_uint32 header_flags, file_flags;
- sqd_uint32 frecsize, precsize, srecsize;
- sqd_uint64 foffset, poffset, soffset;
- char *s, *s2;
-
- if ((fp = fopen(file,"wb")) == NULL) return SSI_ERR_NOFILE;
- status = 0;
-
- /* How big is the index? If it's going to be > 2GB, we need
- * to flip to 64-bit index mode. 2047 (instead of 2048) gives us
- * some slop room.
- * die'ing here is pretty brutal - if we flip to 64-bit index
- * mode, we hve 100's of millions of keys, so we've processed
- * a long time before reaching this point. Ah well.
- */
- if (current_index_size(g) >= 2047) {
- g->imode = SSI_OFFSET_I64;
-#ifndef HAS_64BIT_FILE_OFFSETS
- Die("\
-Can't switch to 64-bit SSI index mode on this system, sorry;\n\
-I don't have 64-bit file offset functions available.\n");
-#endif
- }
-
- /* Magic-looking numbers come from adding up sizes
- * of things in bytes
- */
- frecsize = 16 + g->flen;
- precsize = (g->smode == SSI_OFFSET_I64) ? 22+g->plen : 14+g->plen;
- srecsize = g->slen + g->plen;
-
- header_flags = 0;
- if (g->smode == SSI_OFFSET_I64) header_flags |= SSI_USE64;
- if (g->imode == SSI_OFFSET_I64) header_flags |= SSI_USE64_INDEX;
-
- /* Magic-looking numbers again come from adding up sizes
- * of things in bytes
- */
- foffset = (header_flags & SSI_USE64_INDEX) ? 66 : 54;
- poffset = foffset + frecsize*g->nfiles;
- soffset = poffset + precsize*g->nprimary;
-
- /* Sort the keys
- * If external mode, make system calls to UNIX/POSIX "sort" in place, then
- * open new sorted files for reading thru ptmp and stmp handles.
- * If internal mode, call qsort.
- *
- * Note that you'd better force a POSIX locale for the sort; else,
- * some silly distro (e.g. Mandrake Linux >=8.1) may have specified
- * LC_COLLATE=en_US, and this'll give a sort "bug" in which it doesn't
- * sort by byte order.
- */
- if (g->external) {
- char cmd[1024];
-
- fclose(g->ptmp);
- g->ptmp = NULL;
- sprintf(cmd, "env LC_ALL=POSIX sort -o %s %s\n", g->ptmpfile, g->ptmpfile);
- if ((status = system(cmd)) != 0) return SSI_ERR_EXTERNAL_SORT;
- if ((g->ptmp = fopen(g->ptmpfile, "r")) == NULL) return SSI_ERR_EXTERNAL_SORT;
-
- fclose(g->stmp);
- g->stmp = NULL;
- sprintf(cmd, "env LC_ALL=POSIX sort -o %s %s\n", g->stmpfile, g->stmpfile);
- if ((status = system(cmd)) != 0) return SSI_ERR_EXTERNAL_SORT;
- if ((g->stmp = fopen(g->stmpfile, "r")) == NULL) return SSI_ERR_EXTERNAL_SORT;
- } else {
- qsort((void *) g->pkeys, g->nprimary, sizeof(struct ssipkey_s), pkeysort);
- qsort((void *) g->skeys, g->nsecondary, sizeof(struct ssiskey_s), skeysort);
- }
-
- /* Write the header
- */
- if (! write_i32(fp, v20magic)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, header_flags)) return SSI_ERR_FWRITE;
- if (! write_i16(fp, g->nfiles)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, g->nprimary)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, g->nsecondary)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, g->flen)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, g->plen)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, g->slen)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, frecsize)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, precsize)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, srecsize)) return SSI_ERR_FWRITE;
- if (g->imode == SSI_OFFSET_I32) {
- if (! write_i32(fp, foffset)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, poffset)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, soffset)) return SSI_ERR_FWRITE;
- } else {
- if (! write_i64(fp, foffset)) return SSI_ERR_FWRITE;
- if (! write_i64(fp, poffset)) return SSI_ERR_FWRITE;
- if (! write_i64(fp, soffset)) return SSI_ERR_FWRITE;
- }
-
- /* The file section
- */
- if ((s = malloc(sizeof(char) * g->flen)) == NULL) return SSI_ERR_MALLOC;
- for (i = 0; i < g->nfiles; i++)
- {
- file_flags = 0;
- if (g->bpl[i] > 0 && g->rpl[i] > 0) file_flags |= SSI_FAST_SUBSEQ;
-
- strcpy(s, g->filenames[i]);
- if (fwrite(s, sizeof(char), g->flen, fp) != g->flen) return SSI_ERR_FWRITE;
- if (! write_i32(fp, g->fileformat[i])) return SSI_ERR_FWRITE;
- if (! write_i32(fp, file_flags)) return SSI_ERR_FWRITE;
- if (! write_i32(fp, g->bpl[i])) return SSI_ERR_FWRITE;
- if (! write_i32(fp, g->rpl[i])) return SSI_ERR_FWRITE;
- }
- free(s);
-
- /* The primary key section
- */
- if ((s = malloc(sizeof(char) * g->plen)) == NULL) return SSI_ERR_MALLOC;
- if (g->external) {
- char *buf = NULL;
- int buflen = 0;
- struct ssipkey_s pkey;
- for (i = 0; i < g->nprimary; i++)
- {
- if (sre_fgets(&buf, &buflen, g->ptmp) == NULL) return SSI_ERR_NODATA;
- if (parse_pkey_info(buf, g->smode, &pkey) != 0) return SSI_ERR_BADFORMAT;
- strcpy(s, pkey.key);
- if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE;
- if (! write_i16( fp, pkey.fnum)) return SSI_ERR_FWRITE;
- if (! write_offset(fp, &(pkey.r_off))) return SSI_ERR_FWRITE;
- if (! write_offset(fp, &(pkey.d_off))) return SSI_ERR_FWRITE;
- if (! write_i32( fp, pkey.len)) return SSI_ERR_FWRITE;
- }
- free(buf);
- } else {
- for (i = 0; i < g->nprimary; i++)
- {
- strcpy(s, g->pkeys[i].key);
- if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE;
- if (! write_i16( fp, g->pkeys[i].fnum)) return SSI_ERR_FWRITE;
- if (! write_offset(fp, &(g->pkeys[i].r_off))) return SSI_ERR_FWRITE;
- if (! write_offset(fp, &(g->pkeys[i].d_off))) return SSI_ERR_FWRITE;
- if (! write_i32( fp, g->pkeys[i].len)) return SSI_ERR_FWRITE;
- }
- }
-
- /* The secondary key section
- */
- if (g->nsecondary > 0) {
- if ((s2 = malloc(sizeof(char) * g->slen)) == NULL) return SSI_ERR_MALLOC;
-
- if (g->external) {
- struct ssiskey_s skey;
- char *buf = NULL;
- int n = 0;
-
- for (i = 0; i < g->nsecondary; i++)
- {
- if (sre_fgets(&buf, &n, g->stmp) == NULL) return SSI_ERR_NODATA;
- if (parse_skey_info(buf, &skey) != 0) return SSI_ERR_BADFORMAT;
- strcpy(s2, skey.key);
- strcpy(s, skey.pkey);
- if (fwrite(s2, sizeof(char), g->slen, fp) != g->slen) return SSI_ERR_FWRITE;
- if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE;
- }
- free(buf);
- } else {
- for (i = 0; i < g->nsecondary; i++)
- {
- strcpy(s2, g->skeys[i].key);
- strcpy(s, g->skeys[i].pkey);
- if (fwrite(s2, sizeof(char), g->slen, fp) != g->slen) return SSI_ERR_FWRITE;
- if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE;
- }
- }
- free(s2);
- }
-
- free(s);
- fclose(fp);
- return status;
-}
-
-
-/* Function: SSIFreeIndex()
- * Date: SRE, Tue Jan 2 11:44:08 2001 [St. Louis]
- *
- * Purpose: Free an index structure {g}.
- *
- * Args: g - ptr to an open index.
- *
- * Returns: (void)
- */
-void
-SSIFreeIndex(SSIINDEX *g)
-{
- int i;
- if (g != NULL)
- {
- if (g->external == FALSE) {
- for (i = 0; i < g->nprimary; i++) free(g->pkeys[i].key);
- for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].key);
- for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].pkey);
- if (g->pkeys != NULL) free(g->pkeys);
- if (g->skeys != NULL) free(g->skeys);
- } else {
- if (g->ptmp != NULL) fclose(g->ptmp);
- if (g->stmp != NULL) fclose(g->stmp);
-#if DEBUGLEVEL == 0
- remove(g->ptmpfile);
- remove(g->stmpfile);
-#endif
- }
- for (i = 0; i < g->nfiles; i++) free(g->filenames[i]);
- if (g->filenames != NULL) free(g->filenames);
- if (g->fileformat != NULL) free(g->fileformat);
- if (g->bpl != NULL) free(g->bpl);
- if (g->rpl != NULL) free(g->rpl);
- free(g);
- }
-}
-
-
-/* Function: SSIErrorString()
- * Date: SRE, Tue Jan 2 10:38:10 2001 [St. Louis]
- *
- * Purpose: Returns a ptr to an internal string corresponding
- * to error {n}, a code returned from any of the
- * functions in the API that return non-zero on error.
- *
- * Args: n - error code
- *
- * Returns: ptr to an internal string.
- */
-char *
-SSIErrorString(int n)
-{
- switch (n) {
- case SSI_ERR_OK: return "ok (no error)";
- case SSI_ERR_NODATA: return "no data, fread() failed";
- case SSI_ERR_NO_SUCH_KEY: return "no such key";
- case SSI_ERR_MALLOC: return "out of memory, malloc() failed";
- case SSI_ERR_NOFILE: return "file not found, fopen() failed";
- case SSI_ERR_BADMAGIC: return "not a SSI file? (bad magic)";
- case SSI_ERR_BADFORMAT: return "corrupt format? unexpected data";
- case SSI_ERR_NO64BIT: return "no large file support for this system";
- case SSI_ERR_SEEK_FAILED: return "failed to reposition on disk";
- case SSI_ERR_TELL_FAILED: return "failed to get file position on disk";
- case SSI_ERR_NO_SUBSEQS: return "no fast subseq support for this seqfile";
- case SSI_ERR_RANGE: return "subseq start is out of range";
- case SSI_ERR_BADARG: return "an argument is out of range";
- case SSI_ERR_TOOMANY_FILES: return "number of files exceeds limit";
- case SSI_ERR_TOOMANY_KEYS: return "number of keys exceeds limit";
- case SSI_ERR_FWRITE: return "an fwrite() failed";
- case SSI_ERR_EXTERNAL_SORT: return "some problem with external sorting";
- default: return "unrecognized code";
- }
- /*NOTREACHED*/
-}
-
-static int
-read_i16(FILE *fp, sqd_uint16 *ret_result)
-{
- sqd_uint16 result;
- if (fread(&result, sizeof(sqd_uint16), 1, fp) != 1) return 0;
- *ret_result = sre_ntoh16(result);
- return 1;
-}
-static int
-write_i16(FILE *fp, sqd_uint16 n)
-{
- n = sre_hton16(n);
- if (fwrite(&n, sizeof(sqd_uint16), 1, fp) != 1) return 0;
- return 1;
-}
-static int
-read_i32(FILE *fp, sqd_uint32 *ret_result)
-{
- sqd_uint32 result;
- if (fread(&result, sizeof(sqd_uint32), 1, fp) != 1) return 0;
- *ret_result = sre_ntoh32(result);
- return 1;
-}
-static int
-write_i32(FILE *fp, sqd_uint32 n)
-{
- n = sre_hton32(n);
- if (fwrite(&n, sizeof(sqd_uint32), 1, fp) != 1) return 0;
- return 1;
-}
-static int
-read_i64(FILE *fp, sqd_uint64 *ret_result)
-{
- sqd_uint64 result;
- if (fread(&result, sizeof(sqd_uint64), 1, fp) != 1) return 0;
- *ret_result = sre_ntoh64(result);
- return 1;
-}
-static int
-write_i64(FILE *fp, sqd_uint64 n)
-{
- n = sre_hton64(n);
- if (fwrite(&n, sizeof(sqd_uint64), 1, fp) != 1) return 0;
- return 1;
-}
-static int
-read_offset(FILE *fp, char mode, SSIOFFSET *ret_offset)
-{
- if (mode == SSI_OFFSET_I32) {
- ret_offset->mode = SSI_OFFSET_I32;
- if (! read_i32(fp, &(ret_offset->off.i32))) return 0;
- } else if (mode == SSI_OFFSET_I64) {
- ret_offset->mode = SSI_OFFSET_I64;
- if (! read_i64(fp, &(ret_offset->off.i64))) return 0;
- } else return 0;
-
- return 1;
-}
-static int
-write_offset(FILE *fp, SSIOFFSET *offset)
-{
- if (offset->mode == SSI_OFFSET_I32) return write_i32(fp, offset->off.i32);
- else if (offset->mode == SSI_OFFSET_I64) return write_i64(fp, offset->off.i64);
- else abort();
- /*UNREACHED*/
- return 1; /* silence bitchy compilers */
-}
-
-static int
-parse_pkey_info(char *buf, char mode, struct ssipkey_s *pkey)
-{
- char *s, *tok;
- int n;
-
- s = buf;
- if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT;
- pkey->key = tok;
- if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT;
- pkey->fnum = (sqd_uint16) atoi(tok);
-
- if (mode == SSI_OFFSET_I32) {
- if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT;
- pkey->r_off.mode = mode;
- pkey->r_off.off.i32 = (sqd_uint32) strtoul(tok, NULL, 10);
- if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT;
- pkey->d_off.mode = mode;
- pkey->d_off.off.i32 = (sqd_uint32) strtoul(tok, NULL, 10);
- }
-#ifdef HAS_64BIT_FILE_OFFSETS
- else {
- if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT;
- pkey->r_off.mode = mode;
- pkey->r_off.off.i64 = (sqd_uint64) strtoull(tok, NULL, 10);
- if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT;
- pkey->d_off.mode = mode;
- pkey->d_off.off.i64 = (sqd_uint64) strtoull(tok, NULL, 10);
- }
-#else
- else {
- return SSI_ERR_NO64BIT;
- }
-#endif
- if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT;
- pkey->len = (sqd_uint32) strtoul(tok, NULL, 10);
-
- return 0;
-}
-static int
-parse_skey_info(char *buf, struct ssiskey_s *skey)
-{
- char *s, *tok;
- int n;
-
- s = buf;
- if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT;
- skey->key = tok;
- if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT;
- skey->pkey = tok;
- return 0;
-}
-
-/* Function: binary_search()
- * Date: SRE, Sun Dec 31 16:05:03 2000 [St. Louis]
- *
- * Purpose: Find a key in a SSI index, by a binary search
- * in an alphabetically sorted list of keys. If successful,
- * return 0, and the index file is positioned to read
- * the rest of the data for that key. Else returns nonzero.
- *
- * Args: sfp - an open SSIFILE
- * key - key to find
- * klen - key length to allocate (plen or slen from sfp)
- * base - base offset (poffset or soffset)
- * recsize - size of each key record in bytes (precsize or srecsize)
- * maxidx - # of keys (nprimary or nsecondary)
- *
- * Returns: 0 on success, and leaves file positioned for reading remaining
- * data for the key.
- * Nonzero on failure:
- * SSI_ERR_NO_SUCH_KEY - that key's not in the index
- * SSI_ERR_MALLOC - a memory allocation failure
- * SSI_ERR_NODATA - an fread() failed
- */
-static int
-binary_search(SSIFILE *sfp, char *key, int klen, SSIOFFSET *base,
- sqd_uint32 recsize, sqd_uint32 maxidx)
-{
- char *name;
- sqd_uint32 left, right, mid;
- int cmp;
- int status;
-
- if (maxidx == 0) return SSI_ERR_NO_SUCH_KEY; /* special case: empty index */
- if ((name = malloc (sizeof(char)*klen)) == NULL) return SSI_ERR_MALLOC;
- left = 0;
- right = maxidx-1;
- while (1) { /* A binary search: */
- mid = (left+right) / 2; /* careful here. only works because
- we limit unsigned vars to signed ranges. */
- if ((status = indexfile_position(sfp, base, recsize, mid)) != 0)
- { free(name); return status; }
- if (fread(name, sizeof(char), klen, sfp->fp) != klen)
- { free(name); return SSI_ERR_NODATA; }
- cmp = strcmp(name, key);
- if (cmp == 0) break; /* found it! */
- else if (left >= right) /* oops, missed it; fail */
- { free(name); return SSI_ERR_NO_SUCH_KEY; }
- else if (cmp < 0) left = mid+1; /* it's right of mid */
- else if (cmp > 0) {
- if (mid == 0) { free(name); return SSI_ERR_NO_SUCH_KEY; } /* special case, beware */
- else right = mid-1; /* it's left of mid */
- }
- }
- free(name);
- return 0; /* and sfp->fp is positioned... */
-}
-
-/* Function: indexfile_position()
- * Date: SRE, Mon Jan 1 19:32:49 2001 [St. Louis]
- *
- * Purpose: Position the open index file {sfp} at the start
- * of record {n} in a list of records that starts at
- * base offset {base}, where each record takes up {l}
- * bytes. (e.g. the position is byte (base + n*l)).
- *
- * Args: sfp - open SSIFILE
- * base - offset of record 0 (e.g. sfp->foffset)
- * len - size of each record in bytes (e.g. sfp->frecsize)
- * n - which record to get (e.g. 0..sfp->nfiles)
- *
- * Returns: 0 on success, non-zero on failure.
- */
-static int
-indexfile_position(SSIFILE *sfp, SSIOFFSET *base, sqd_uint32 len, sqd_uint32 n)
-{
- SSIOFFSET pos;
- int status;
-
- if (base->mode == SSI_OFFSET_I32) {
- pos.mode = SSI_OFFSET_I32;
- pos.off.i32 = base->off.i32 + n*len;
- } else if (base->mode == SSI_OFFSET_I64) {
- pos.mode = SSI_OFFSET_I64;
- pos.off.i64 = base->off.i64 + n*len;
- } else return 0;
- if ((status = SSISetFilePosition(sfp->fp, &pos)) != 0) return status;
- return 0;
-}
-
-/* Function: current_index_size()
- * Date: SRE, Tue Feb 20 18:23:30 2001 [St. Louis]
- *
- * Purpose: Calculates the size of the current index,
- * in megabytes.
- */
-static sqd_uint64
-current_index_size(SSIINDEX *g)
-{
- sqd_uint64 frecsize, precsize, srecsize;
- sqd_uint64 total;
-
- /* Magic-looking numbers come from adding up sizes
- * of things in bytes
- */
- frecsize = 16 + g->flen;
- precsize = (g->smode == SSI_OFFSET_I64) ? 22+g->plen : 14+g->plen;
- srecsize = g->plen+g->slen;
- total = (66L + /* header size, if 64bit index offsets */
- frecsize * g->nfiles + /* file section size */
- precsize * g->nprimary + /* primary key section size */
- srecsize * g->nsecondary) / /* secondary key section size */
- 1048576L;
- return total;
-}
-/* Function: activate_external_sort()
- * Date: SRE, Mon Feb 4 09:08:08 2002 [St. Louis]
- *
- * Purpose: Switch to external sort mode.
- * Open file handles for external index files (ptmp, stmp).
- * Flush current index information to these files.
- * Free current memory, turn over control to the tmpfiles.
- *
- * Return: 0 on success; non-zero on failure.
- */
-static int
-activate_external_sort(SSIINDEX *g)
-{
- int i;
- /* it's a bit late to be checking this, but... */
- if (g->external) return 0; /* we already are external, fool */
- if (FileExists(g->ptmpfile)) return 1;
- if (FileExists(g->stmpfile)) return 1;
- if ((g->ptmp = fopen(g->ptmpfile, "w")) == NULL) return 1;
- if ((g->stmp = fopen(g->stmpfile, "w")) == NULL) return 1;
-
- /* Flush the current indices.
- */
- SQD_DPRINTF1(("Switching to external sort - flushing ssiindex to disk...\n"));
- for (i = 0; i < g->nprimary; i++) {
- if (g->smode == SSI_OFFSET_I32) {
- fprintf(g->ptmp, "%s\t%u\t%lu\t%lu\t%lu\n",
- g->pkeys[i].key, g->pkeys[i].fnum,
- (unsigned long) g->pkeys[i].r_off.off.i32,
- (unsigned long) g->pkeys[i].d_off.off.i32,
- (unsigned long) g->pkeys[i].len);
- } else {
- fprintf(g->ptmp, "%s\t%u\t%llu\t%llu\t%lu\n",
- g->pkeys[i].key, g->pkeys[i].fnum,
- (unsigned long long) g->pkeys[i].r_off.off.i64,
- (unsigned long long) g->pkeys[i].d_off.off.i64,
- (unsigned long) g->pkeys[i].len);
- }
- }
- for (i = 0; i < g->nsecondary; i++)
- fprintf(g->stmp, "%s\t%s\n", g->skeys[i].key, g->skeys[i].pkey);
-
- /* Free the memory now that we've flushed our lists to disk
- */
- for (i = 0; i < g->nprimary; i++) free(g->pkeys[i].key);
- for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].key);
- for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].pkey);
- if (g->pkeys != NULL) free(g->pkeys);
- if (g->skeys != NULL) free(g->skeys);
- g->pkeys = NULL;
- g->skeys = NULL;
-
- /* Turn control over to external accumulation mode.
- */
- g->external = TRUE;
- return 0;
-}
-
-
-/*****************************************************************
- * Debugging API
- *****************************************************************/
-void
-SSIForceExternalSort(SSIINDEX *g)
-{
- if (activate_external_sort(g) != 0)
- Die("failed to turn external sorting on.");
-}
-
-
-/*****************************************************************
- * Test driving mode
- *****************************************************************/
-#ifdef MUGGINS_LETS_ME_SLEEP
-/* Minimally:
- cc -g -Wall -o shiva -DDEBUGLEVEL=1 -DMUGGINS_LETS_ME_SLEEP ssi.c sqerror.c sre_string.c types.c sre_ctype.c sre_math.c file.c -lm
-*/
-
-int
-main(int argc, char **argv)
-{
- char name[32], accession[32];
- SSIINDEX *ssi;
- int mode;
- SSIOFFSET r_off, d_off;
- FILE *ofp;
- int i;
- int fh; /* a file handle */
- int status; /* return status from a SSI call */
-
- mode = SSI_OFFSET_I32;
- if ((ssi = SSICreateIndex(mode)) == NULL)
- Die("Failed to allocate SSI index");
-
- /* Generate two FASTA files, tmp.0 and tmp.1, and index them.
- */
- if ((ofp = fopen("tmp.0", "w")) == NULL)
- Die("failed to open tmp.0");
- if ((status = SSIAddFileToIndex(ssi, "tmp.0", SQFILE_FASTA, &fh)) != 0)
- Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status));
- for (i = 0; i < 10; i++) {
- if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0)
- Die("SSIGetFilePosition() failed: %s", SSIErrorString(status));
- sprintf(name, "seq%d", i);
- sprintf(accession, "ac%d", i);
- fprintf(ofp, ">%s [%s] Description? we don't need no steenking description.\n",
- name, accession);
- if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0)
- Die("SSIGetFilePosition() failed: %s", SSIErrorString(status));
- fprintf(ofp, "AAAAAAAAAA\n");
- fprintf(ofp, "CCCCCCCCCC\n");
- fprintf(ofp, "GGGGGGGGGG\n");
- fprintf(ofp, "TTTTTTTTTT\n");
-
- if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0)
- Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status));
- if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0)
- Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status));
- }
- SSISetFileForSubseq(ssi, fh, 11, 10);
- fclose(ofp);
-
- if ((ofp = fopen("tmp.1", "w")) == NULL)
- Die("failed to open tmp.1");
- if ((status = SSIAddFileToIndex(ssi, "tmp.1", SQFILE_FASTA, &fh)) != 0)
- Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status));
- for (i = 10; i < 20; i++) {
- if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0)
- Die("SSIGetFilePosition() failed: %s", SSIErrorString(status));
- sprintf(name, "seq%d", i);
- sprintf(accession, "ac%d", i);
- fprintf(ofp, ">%s [%s] i/o, i/o, it's off to disk we go.\n",
- name, accession);
- if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0)
- Die("SSIGetFilePosition() failed: %s", SSIErrorString(status));
- fprintf(ofp, "AAAAAAAAAA 10\n");
- fprintf(ofp, "CCCCCCCCCC 20\n");
- fprintf(ofp, "GGGGGGGGGG 30\n");
- fprintf(ofp, "TTTTTTTTTT 40\n");
-
- if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0)
- Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status));
- if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0)
- Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status));
- }
- SSISetFileForSubseq(ssi, fh, 14, 10);
- fclose(ofp);
-
- /* Write the index to tmp.ssi
- */
- if ((status = SSIWriteIndex("tmp.ssi", ssi)) != 0)
- Die("SSIWriteIndex() failed: %s", SSIErrorString(status));
- SSIFreeIndex(ssi);
-
- /* Now reopen the index and run some tests.
- */
- exit(0);
-}
-
-
-#endif /* test driving code */
-
-
-
diff --git a/squid/ssi.h b/squid/ssi.h
deleted file mode 100644
index 8489e30..0000000
--- a/squid/ssi.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-#ifndef SSIH_INCLUDED
-#define SSIH_INCLUDED
-
-/* ssi.h
- * Database indexing (SSI format support)
- * CVS $Id: ssi.h,v 1.8 2003/04/14 16:00:16 eddy Exp $
- *
- * See: ssi_format.tex in Docs/
- */
-
-#include "squidconf.h"
-#include <stdio.h>
-#include "squid.h"
-
-/* Limits
- */
-#define SSI_MAXFILES 32767 /* 2^15-1 */
-#define SSI_MAXKEYS 2147483647L /* 2^31-1 */
-#define SSI_MAXRAM 200 /* allow 200MB indexes before external sort mode */
-
-/* typedef: SSIOFFSET
- * Use the union to save space, since the two offset types are
- * mutually exclusive, controlled by "mode"
- */
-struct ssioffset_s {
- char mode; /* GSI_OFFSET_I32, for example */
- union {
- sqd_uint32 i32; /* an offset that fseek() can use */
- sqd_uint64 i64; /* an offset that e.g. fseeko64() can use */
- } off;
-};
-typedef struct ssioffset_s SSIOFFSET;
-#define SSI_OFFSET_I32 0
-#define SSI_OFFSET_I64 1
-
-/* Structure: SSIFILE
- * xref: SSI API documentation in ssi-format.tex
- */
-struct ssifile_s {
- FILE *fp; /* open SSI index file */
- sqd_uint32 flags; /* optional behavior flags */
- sqd_uint16 nfiles; /* number of files = 16 bit int */
- sqd_uint32 nprimary; /* number of primary keys */
- sqd_uint32 nsecondary; /* number of secondary keys */
- sqd_uint32 flen; /* length of filenames (inc '\0') */
- sqd_uint32 plen; /* length of primary keys (inc '\0') */
- sqd_uint32 slen; /* length of secondary keys (inc '\0') */
- sqd_uint32 frecsize; /* # bytes in a file record */
- sqd_uint32 precsize; /* # bytes in a primary key record */
- sqd_uint32 srecsize; /* # bytes in a secondary key record */
- SSIOFFSET foffset; /* disk offset, start of file records */
- SSIOFFSET poffset; /* disk offset, start of pri key recs */
- SSIOFFSET soffset; /* disk offset, start of sec key recs */
-
- char imode; /* mode for index file offsets, 32 v. 64 bit */
- char smode; /* mode for sequence file offsets, 32 v. 64 bit */
-
- /* File information:
- */
- char **filename; /* list of file names [0..nfiles-1] */
- sqd_uint32 *fileformat; /* file formats */
- sqd_uint32 *fileflags; /* optional per-file behavior flags */
- sqd_uint32 *bpl; /* bytes per line in file */
- sqd_uint32 *rpl; /* residues per line in file */
-};
-typedef struct ssifile_s SSIFILE;
-
-/* optional per-index behavior flags in SSIFILE structure's flags:
- */
-#define SSI_USE64 1<<0 /* seq offsets are 64-bit */
-#define SSI_USE64_INDEX 1<<1 /* index file offsets are 64-bit */
-
-/* optional per-file behavior flags in fileflags
- */
-#define SSI_FAST_SUBSEQ 1<<0 /* can do subseq lookup in this file */
-
-/* Structure: SSIINDEX
- *
- * Used when building up an index and writing it to disk
- */
-struct ssipkey_s { /* Primary key data: */
- char *key; /* key name */
- sqd_uint16 fnum; /* file number */
- SSIOFFSET r_off; /* record offset */
- SSIOFFSET d_off; /* data offset */
- sqd_uint32 len; /* sequence length */
-};
-struct ssiskey_s { /* Secondary key data: */
- char *key; /* secondary key name */
- char *pkey; /* primary key name */
-};
-struct ssiindex_s {
- int smode; /* sequence mode: SSI_OFFSET_I32 or _I64 */
- int imode; /* index mode: SSI_OFFSET_I32 or _I64 */
- int external; /* TRUE if pkeys and skeys are on disk */
- int max_ram; /* maximum RAM in MB before switching to external */
-
- char **filenames;
- sqd_uint32 *fileformat;
- sqd_uint32 *bpl;
- sqd_uint32 *rpl;
- sqd_uint32 flen; /* length of longest filename, inc '\0' */
- sqd_uint16 nfiles;
-
- struct ssipkey_s *pkeys;
- sqd_uint32 plen; /* length of longest pkey, including '\0' */
- sqd_uint32 nprimary;
- char *ptmpfile; /* name of tmp file, for external sort mode */
- FILE *ptmp; /* handle on open ptmpfile */
-
- struct ssiskey_s *skeys;
- sqd_uint32 slen; /* length of longest skey, including '\0' */
- sqd_uint32 nsecondary;
- char *stmpfile; /* name of tmp file, for external sort mode */
- FILE *stmp; /* handle on open ptmpfile */
-};
-typedef struct ssiindex_s SSIINDEX;
-
-/* These control malloc and realloc chunk sizes in the index
- * construction code.
- */
-#define SSI_FILE_BLOCK 10
-#define SSI_KEY_BLOCK 100
-
-/* Error codes set by the API
- */
-#define SSI_ERR_OK 0
-#define SSI_ERR_NODATA 1 /* no data? an fread() failed */
-#define SSI_ERR_NO_SUCH_KEY 2 /* that key's not in the index */
-#define SSI_ERR_MALLOC 3
-#define SSI_ERR_NOFILE 4 /* no such file? an fopen() failed */
-#define SSI_ERR_BADMAGIC 5 /* magic number mismatch in GSIOpen() */
-#define SSI_ERR_BADFORMAT 6 /* didn't read what I expected to fread() */
-#define SSI_ERR_NO64BIT 7 /* needed 64-bit support and didn't have it */
-#define SSI_ERR_SEEK_FAILED 8 /* an fseek() (or similar) failed */
-#define SSI_ERR_TELL_FAILED 9 /* an ftell() (or similar) failed */
-#define SSI_ERR_NO_SUBSEQS 10 /* fast subseq is disallowed */
-#define SSI_ERR_RANGE 11 /* subseq requested is out of range */
-#define SSI_ERR_BADARG 12 /* something wrong with a function argument */
-#define SSI_ERR_TOOMANY_FILES 13 /* ran out of range for files in an index */
-#define SSI_ERR_TOOMANY_KEYS 14 /* ran out of range for keys in an index */
-#define SSI_ERR_FWRITE 15
-#define SSI_ERR_EXTERNAL_SORT 16 /* external sort failed */
-
-/* The SSI file reading API:
- */
-extern int SSIOpen(char *filename, SSIFILE **ret_sfp);
-extern int SSIGetOffsetByName(SSIFILE *sfp, char *key, int *ret_fh,
- SSIOFFSET *ret_offset);
-extern int SSIGetOffsetByNumber(SSIFILE *sfp, int n, int *ret_fh,
- SSIOFFSET *ret_offset);
-extern int SSIGetSubseqOffset(SSIFILE *sfp, char *key, int requested_start,
- int *ret_fh, SSIOFFSET *record_offset,
- SSIOFFSET *data_offset, int *ret_actual_start);
-extern int SSISetFilePosition(FILE *fp, SSIOFFSET *offset);
-extern int SSIFileInfo(SSIFILE *sfp, int fh, char **ret_filename, int *ret_format);
-extern void SSIClose(SSIFILE *sfp);
-
-/* The SSI index file writing API:
- */
-extern int SSIRecommendMode(char *file);
-extern SSIINDEX *SSICreateIndex(int mode);
-extern int SSIGetFilePosition(FILE *fp, int mode, SSIOFFSET *ret_offset);
-extern int SSIAddFileToIndex(SSIINDEX *g, char *filename, int fmt, int *ret_fh);
-extern int SSISetFileForSubseq(SSIINDEX *g, int fh, int bpl, int rpl);
-extern int SSIAddPrimaryKeyToIndex(SSIINDEX *g, char *key, int fh,
- SSIOFFSET *r_off, SSIOFFSET *d_off,
- int L);
-extern int SSIAddSecondaryKeyToIndex(SSIINDEX *g, char *key, char *pkey);
-extern int SSIWriteIndex(char *file, SSIINDEX *g);
-extern void SSIFreeIndex(SSIINDEX *g);
-
-/* The SSI misc. functions API:
- */
-extern char *SSIErrorString(int n);
-
-/* The SSI debugging API:
- */
-extern void SSIForceExternalSort(SSIINDEX *g);
-
-#endif /*SSIH_INCLUDED*/
diff --git a/squid/stack.c b/squid/stack.c
deleted file mode 100644
index e3effb4..0000000
--- a/squid/stack.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* stack.c
- * SRE, Thu Mar 3 10:08:48 1994
- *
- * Implementation of generic stack structures.
- * RCS $Id: stack.c,v 1.3 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdlib.h>
-#include "squid.h"
-
-
-/************************************************************
- * intstack_s implementation.
- *
- * Functions: InitIntStack() - returns ptr to new stack
- * PushIntStack() - (void)
- * PopIntStack() - returns 1 on success, 0 if stack empty
- * FreeIntStack() - returns number of elements free'd, or 0 if
- * stack was empty.
- *
- * Implementation of the pushdown stack for storing single
- * integers.
- *************************************************************/
-struct intstack_s *
-InitIntStack(void)
-{
- struct intstack_s *stack;
-
- if ((stack = (struct intstack_s *) malloc (sizeof(struct intstack_s))) == NULL)
- Die("Memory allocation failure at %s line %d", __FILE__, __LINE__);
- stack->nxt = NULL;
- return stack;
-}
-void
-PushIntStack(struct intstack_s *stack, int data)
-{
- struct intstack_s *new;
-
- if ((new = (struct intstack_s *) malloc (sizeof(struct intstack_s))) == NULL)
- Die("Memory allocation failure at %s line %d", __FILE__, __LINE__);
- new->data = data;
-
- new->nxt = stack->nxt;
- stack->nxt = new;
-}
-
-int
-PopIntStack(struct intstack_s *stack, int *ret_data)
-{
- struct intstack_s *old;
-
- if (stack->nxt == NULL) return 0;
-
- old = stack->nxt;
- stack->nxt = old->nxt;
-
- *ret_data = old->data;
- free(old);
- return 1;
-}
-
-void
-ReverseIntStack(struct intstack_s *stack)
-{
- struct intstack_s *old;
- struct intstack_s *new;
-
- old = stack->nxt;
- stack->nxt = NULL;
- while (old != NULL)
- {
- new = old; /* remove one from top of old stack */
- old = old->nxt;
- new->nxt = stack->nxt; /* push it onto new stack */
- stack->nxt = new;
- }
-}
-
-int
-FreeIntStack( struct intstack_s *stack )
-{
- int data;
- int count = 0;
-
- while (PopIntStack(stack, &data))
- count++;
- free(stack);
- return count;
-}
diff --git a/squid/stockholm.c b/squid/stockholm.c
deleted file mode 100644
index 3e760cb..0000000
--- a/squid/stockholm.c
+++ /dev/null
@@ -1,630 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* stockholm.c
- * SRE, Fri May 28 15:46:41 1999
- *
- * Reading/writing of Stockholm format multiple sequence alignments.
- *
- * example of API:
- *
- * MSA *msa;
- * FILE *fp; -- opened for write with fopen()
- * MSAFILE *afp; -- opened for read with MSAFileOpen()
- *
- * while ((msa = ReadStockholm(afp)) != NULL)
- * {
- * WriteStockholm(fp, msa);
- * MSAFree(msa);
- * }
- *
- * RCS $Id: stockholm.c,v 1.8 2003/04/14 16:00:16 eddy Exp $
- */
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include "squid.h"
-#include "msa.h"
-
-static int parse_gf(MSA *msa, char *buf);
-static int parse_gs(MSA *msa, char *buf);
-static int parse_gc(MSA *msa, char *buf);
-static int parse_gr(MSA *msa, char *buf);
-static int parse_comment(MSA *msa, char *buf);
-static int parse_sequence(MSA *msa, char *buf);
-static void actually_write_stockholm(FILE *fp, MSA *msa, int cpl);
-
-#ifdef TESTDRIVE_STOCKHOLM
-/*****************************************************************
- * stockholm.c test driver:
- * cc -DTESTDRIVE_STOCKHOLM -g -O2 -Wall -o test stockholm.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c -lm
- *
- */
-int
-main(int argc, char **argv)
-{
- MSAFILE *afp;
- MSA *msa;
- char *file;
-
- file = argv[1];
-
- if ((afp = MSAFileOpen(file, MSAFILE_STOCKHOLM, NULL)) == NULL)
- Die("Couldn't open %s\n", file);
-
- while ((msa = ReadStockholm(afp)) != NULL)
- {
- WriteStockholm(stdout, msa);
- MSAFree(msa);
- }
-
- MSAFileClose(afp);
- exit(0);
-}
-/******************************************************************/
-#endif /* testdriver */
-
-
-/* Function: ReadStockholm()
- * Date: SRE, Fri May 21 17:33:10 1999 [St. Louis]
- *
- * Purpose: Parse the next alignment from an open Stockholm
- * format alignment file. Return the alignment, or
- * NULL if there are no more alignments in the file.
- *
- * Args: afp - open alignment file
- *
- * Returns: MSA * - an alignment object.
- * caller responsible for an MSAFree()
- * NULL if no more alignments
- *
- * Diagnostics:
- * Will Die() here with a (potentially) useful message
- * if a parsing error occurs
- */
-MSA *
-ReadStockholm(MSAFILE *afp)
-{
- MSA *msa;
- char *s;
- int status;
-
- if (feof(afp->f)) return NULL;
-
- /* Initialize allocation of the MSA.
- */
- msa = MSAAlloc(10, 0);
-
- /* Check the magic Stockholm header line.
- * We have to skip blank lines here, else we perceive
- * trailing blank lines in a file as a format error when
- * reading in multi-record mode.
- */
- do {
- if ((s = MSAFileGetLine(afp)) == NULL) {
- MSAFree(msa);
- return NULL;
- }
- } while (IsBlankline(s));
-
- if (strncmp(s, "# STOCKHOLM 1.", 14) != 0)
- Die("\
-File %s doesn't appear to be in Stockholm format.\n\
-Assuming there isn't some other problem with your file (it is an\n\
-alignment file, right?), please either:\n\
- a) use the Babelfish format autotranslator option (-B, usually);\n\
- b) specify the file's format with the --informat option; or\n\
- a) reformat the alignment to Stockholm format.\n",
- afp->fname);
-
- /* Read the alignment file one line at a time.
- */
- while ((s = MSAFileGetLine(afp)) != NULL)
- {
- while (*s == ' ' || *s == '\t') s++; /* skip leading whitespace */
-
- if (*s == '#') {
- if (strncmp(s, "#=GF", 4) == 0) status = parse_gf(msa, s);
- else if (strncmp(s, "#=GS", 4) == 0) status = parse_gs(msa, s);
- else if (strncmp(s, "#=GC", 4) == 0) status = parse_gc(msa, s);
- else if (strncmp(s, "#=GR", 4) == 0) status = parse_gr(msa, s);
- else status = parse_comment(msa, s);
- }
- else if (strncmp(s, "//", 2) == 0) break;
- else if (*s == '\n') continue;
- else status = parse_sequence(msa, s);
-
- if (status == 0)
- Die("Stockholm format parse error: line %d of file %s while reading alignment %s",
- afp->linenumber, afp->fname, msa->name == NULL? "" : msa->name);
- }
-
- if (s == NULL && msa->nseq != 0)
- Die ("Didn't find // at end of alignment %s", msa->name == NULL ? "" : msa->name);
-
- if (s == NULL && msa->nseq == 0) {
- /* probably just some junk at end of file */
- MSAFree(msa);
- return NULL;
- }
-
- MSAVerifyParse(msa);
- return msa;
-}
-
-
-/* Function: WriteStockholm()
- * Date: SRE, Mon May 31 19:15:22 1999 [St. Louis]
- *
- * Purpose: Write an alignment in standard multi-block
- * Stockholm format to an open file. A wrapper
- * for actually_write_stockholm().
- *
- * Args: fp - file that's open for writing
- * msa - alignment to write
- *
- * Returns: (void)
- */
-void
-WriteStockholm(FILE *fp, MSA *msa)
-{
- actually_write_stockholm(fp, msa, 50); /* 50 char per block */
-}
-
-/* Function: WriteStockholmOneBlock()
- * Date: SRE, Mon May 31 19:15:22 1999 [St. Louis]
- *
- * Purpose: Write an alignment in Pfam's single-block
- * Stockholm format to an open file. A wrapper
- * for actually_write_stockholm().
- *
- * Args: fp - file that's open for writing
- * msa - alignment to write
- *
- * Returns: (void)
- */
-void
-WriteStockholmOneBlock(FILE *fp, MSA *msa)
-{
- actually_write_stockholm(fp, msa, msa->alen); /* one big block */
-}
-
-
-/* Function: actually_write_stockholm()
- * Date: SRE, Fri May 21 17:39:22 1999 [St. Louis]
- *
- * Purpose: Write an alignment in Stockholm format to
- * an open file. This is the function that actually
- * does the work. The API's WriteStockholm()
- * and WriteStockholmOneBlock() are wrappers.
- *
- * Args: fp - file that's open for writing
- * msa - alignment to write
- * cpl - characters to write per line in alignment block
- *
- * Returns: (void)
- */
-static void
-actually_write_stockholm(FILE *fp, MSA *msa, int cpl)
-{
- int i, j;
- int len = 0;
- int namewidth;
- int typewidth = 0; /* markup tags are up to 5 chars long */
- int markupwidth = 0; /* #=GR, #=GC are four char wide + 1 space */
- char *buf;
- int currpos;
- char *s, *tok;
-
- /* Figure out how much space we need for name + markup
- * to keep the alignment in register. Required by Stockholm
- * spec, even though our Stockholm parser doesn't care (Erik's does).
- */
- namewidth = 0;
- for (i = 0; i < msa->nseq; i++)
- if ((len = strlen(msa->sqname[i])) > namewidth)
- namewidth = len;
-
- /* Figure out how much space we need for markup tags
- * markupwidth = always 4 if we're doing markup: strlen("#=GR")
- * typewidth = longest markup tag
- */
- if (msa->ss != NULL) { markupwidth = 4; typewidth = 2; }
- if (msa->sa != NULL) { markupwidth = 4; typewidth = 2; }
- for (i = 0; i < msa->ngr; i++)
- if ((len = strlen(msa->gr_tag[i])) > typewidth) typewidth = len;
-
- if (msa->rf != NULL) { markupwidth = 4; if (typewidth < 2) typewidth = 2; }
- if (msa->ss_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }
- if (msa->sa_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }
- for (i = 0; i < msa->ngc; i++)
- if ((len = strlen(msa->gc_tag[i])) > typewidth) typewidth = len;
-
- buf = MallocOrDie(sizeof(char) * (cpl+namewidth+typewidth+markupwidth+61));
-
- /* Magic Stockholm header
- */
- fprintf(fp, "# STOCKHOLM 1.0\n");
-
- /* Free text comments
- */
- for (i = 0; i < msa->ncomment; i++)
- fprintf(fp, "# %s\n", msa->comment[i]);
- if (msa->ncomment > 0) fprintf(fp, "\n");
-
- /* GF section: per-file annotation
- */
- if (msa->name != NULL) fprintf(fp, "#=GF ID %s\n", msa->name);
- if (msa->acc != NULL) fprintf(fp, "#=GF AC %s\n", msa->acc);
- if (msa->desc != NULL) fprintf(fp, "#=GF DE %s\n", msa->desc);
- if (msa->au != NULL) fprintf(fp, "#=GF AU %s\n", msa->au);
-
- /* Thresholds are hacky. Pfam has two. Rfam has one.
- */
- if (msa->cutoff_is_set[MSA_CUTOFF_GA1] && msa->cutoff_is_set[MSA_CUTOFF_GA2])
- fprintf(fp, "#=GF GA %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_GA1], msa->cutoff[MSA_CUTOFF_GA2]);
- else if (msa->cutoff_is_set[MSA_CUTOFF_GA1])
- fprintf(fp, "#=GF GA %.1f\n", msa->cutoff[MSA_CUTOFF_GA1]);
- if (msa->cutoff_is_set[MSA_CUTOFF_NC1] && msa->cutoff_is_set[MSA_CUTOFF_NC2])
- fprintf(fp, "#=GF NC %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_NC1], msa->cutoff[MSA_CUTOFF_NC2]);
- else if (msa->cutoff_is_set[MSA_CUTOFF_NC1])
- fprintf(fp, "#=GF NC %.1f\n", msa->cutoff[MSA_CUTOFF_NC1]);
- if (msa->cutoff_is_set[MSA_CUTOFF_TC1] && msa->cutoff_is_set[MSA_CUTOFF_TC2])
- fprintf(fp, "#=GF TC %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_TC1], msa->cutoff[MSA_CUTOFF_TC2]);
- else if (msa->cutoff_is_set[MSA_CUTOFF_TC1])
- fprintf(fp, "#=GF TC %.1f\n", msa->cutoff[MSA_CUTOFF_TC1]);
-
- for (i = 0; i < msa->ngf; i++)
- fprintf(fp, "#=GF %-5s %s\n", msa->gf_tag[i], msa->gf[i]);
- fprintf(fp, "\n");
-
-
- /* GS section: per-sequence annotation
- */
- if (msa->flags & MSA_SET_WGT)
- {
- for (i = 0; i < msa->nseq; i++)
- fprintf(fp, "#=GS %-*.*s WT %.2f\n", namewidth, namewidth, msa->sqname[i], msa->wgt[i]);
- fprintf(fp, "\n");
- }
- if (msa->sqacc != NULL)
- {
- for (i = 0; i < msa->nseq; i++)
- if (msa->sqacc[i] != NULL)
- fprintf(fp, "#=GS %-*.*s AC %s\n", namewidth, namewidth, msa->sqname[i], msa->sqacc[i]);
- fprintf(fp, "\n");
- }
- if (msa->sqdesc != NULL)
- {
- for (i = 0; i < msa->nseq; i++)
- if (msa->sqdesc[i] != NULL)
- fprintf(fp, "#=GS %*.*s DE %s\n", namewidth, namewidth, msa->sqname[i], msa->sqdesc[i]);
- fprintf(fp, "\n");
- }
- for (i = 0; i < msa->ngs; i++)
- {
- /* Multiannotated GS tags are possible; for example,
- * #=GS foo DR PDB; 1xxx;
- * #=GS foo DR PDB; 2yyy;
- * These are stored, for example, as:
- * msa->gs[0][0] = "PDB; 1xxx;\nPDB; 2yyy;"
- * and must be decomposed.
- */
- for (j = 0; j < msa->nseq; j++)
- if (msa->gs[i][j] != NULL)
- {
- s = msa->gs[i][j];
- while ((tok = sre_strtok(&s, "\n", NULL)) != NULL)
- fprintf(fp, "#=GS %*.*s %5s %s\n", namewidth, namewidth,
- msa->sqname[j], msa->gs_tag[i], tok);
- }
- fprintf(fp, "\n");
- }
-
- /* Alignment section:
- * contains aligned sequence, #=GR annotation, and #=GC annotation
- */
- for (currpos = 0; currpos < msa->alen; currpos += cpl)
- {
- if (currpos > 0) fprintf(fp, "\n");
- for (i = 0; i < msa->nseq; i++)
- {
- strncpy(buf, msa->aseq[i] + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "%-*.*s %s\n", namewidth+typewidth+markupwidth, namewidth+typewidth+markupwidth,
- msa->sqname[i], buf);
-
- if (msa->ss != NULL && msa->ss[i] != NULL) {
- strncpy(buf, msa->ss[i] + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "#=GR %-*.*s SS %s\n", namewidth, namewidth, msa->sqname[i], buf);
- }
- if (msa->sa != NULL && msa->sa[i] != NULL) {
- strncpy(buf, msa->sa[i] + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "#=GR %-*.*s SA %s\n", namewidth, namewidth, msa->sqname[i], buf);
- }
- for (j = 0; j < msa->ngr; j++)
- if (msa->gr[j][i] != NULL) {
- strncpy(buf, msa->gr[j][i] + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "#=GR %-*.*s %5s %s\n",
- namewidth, namewidth, msa->sqname[i], msa->gr_tag[j], buf);
- }
- }
- if (msa->ss_cons != NULL) {
- strncpy(buf, msa->ss_cons + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SS_cons", buf);
- }
-
- if (msa->sa_cons != NULL) {
- strncpy(buf, msa->sa_cons + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SA_cons", buf);
- }
-
- if (msa->rf != NULL) {
- strncpy(buf, msa->rf + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "RF", buf);
- }
- for (j = 0; j < msa->ngc; j++) {
- strncpy(buf, msa->gc[j] + currpos, cpl);
- buf[cpl] = '\0';
- fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth,
- msa->gc_tag[j], buf);
- }
- }
- fprintf(fp, "//\n");
- free(buf);
-}
-
-
-
-
-
-/* Format of a GF line:
- * #=GF <featurename> <text>
- */
-static int
-parse_gf(MSA *msa, char *buf)
-{
- char *gf;
- char *featurename;
- char *text;
- char *s;
-
- s = buf;
- if ((gf = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((text = sre_strtok(&s, "\n", NULL)) == NULL) return 0;
- while (*text && (*text == ' ' || *text == '\t')) text++;
-
- if (strcmp(featurename, "ID") == 0)
- msa->name = sre_strdup(text, -1);
- else if (strcmp(featurename, "AC") == 0)
- msa->acc = sre_strdup(text, -1);
- else if (strcmp(featurename, "DE") == 0)
- msa->desc = sre_strdup(text, -1);
- else if (strcmp(featurename, "AU") == 0)
- msa->au = sre_strdup(text, -1);
- else if (strcmp(featurename, "GA") == 0)
- { /* Pfam has GA1, GA2. Rfam just has GA1. */
- s = text;
- if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- msa->cutoff[MSA_CUTOFF_GA1] = atof(text);
- msa->cutoff_is_set[MSA_CUTOFF_GA1] = TRUE;
- if ((text = sre_strtok(&s, WHITESPACE, NULL)) != NULL) {
- msa->cutoff[MSA_CUTOFF_GA2] = atof(text);
- msa->cutoff_is_set[MSA_CUTOFF_GA2] = TRUE;
- }
- }
- else if (strcmp(featurename, "NC") == 0)
- {
- s = text;
- if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- msa->cutoff[MSA_CUTOFF_NC1] = atof(text);
- msa->cutoff_is_set[MSA_CUTOFF_NC1] = TRUE;
- if ((text = sre_strtok(&s, WHITESPACE, NULL)) != NULL) {
- msa->cutoff[MSA_CUTOFF_NC2] = atof(text);
- msa->cutoff_is_set[MSA_CUTOFF_NC2] = TRUE;
- }
- }
- else if (strcmp(featurename, "TC") == 0)
- {
- s = text;
- if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- msa->cutoff[MSA_CUTOFF_TC1] = atof(text);
- msa->cutoff_is_set[MSA_CUTOFF_TC1] = TRUE;
- if ((text = sre_strtok(&s, WHITESPACE, NULL)) != NULL) {
- msa->cutoff[MSA_CUTOFF_TC2] = atof(text);
- msa->cutoff_is_set[MSA_CUTOFF_TC2] = TRUE;
- }
- }
- else
- MSAAddGF(msa, featurename, text);
-
- return 1;
-}
-
-
-/* Format of a GS line:
- * #=GS <seqname> <featurename> <text>
- */
-static int
-parse_gs(MSA *msa, char *buf)
-{
- char *gs;
- char *seqname;
- char *featurename;
- char *text;
- int seqidx;
- char *s;
-
- s = buf;
- if ((gs = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((seqname = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((text = sre_strtok(&s, "\n", NULL)) == NULL) return 0;
- while (*text && (*text == ' ' || *text == '\t')) text++;
-
- /* GS usually follows another GS; guess lastidx+1
- */
- seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1);
- msa->lastidx = seqidx;
-
- if (strcmp(featurename, "WT") == 0)
- {
- msa->wgt[seqidx] = atof(text);
- msa->flags |= MSA_SET_WGT;
- }
-
- else if (strcmp(featurename, "AC") == 0)
- MSASetSeqAccession(msa, seqidx, text);
-
- else if (strcmp(featurename, "DE") == 0)
- MSASetSeqDescription(msa, seqidx, text);
-
- else
- MSAAddGS(msa, featurename, seqidx, text);
-
- return 1;
-}
-
-/* Format of a GC line:
- * #=GC <featurename> <text>
- */
-static int
-parse_gc(MSA *msa, char *buf)
-{
- char *gc;
- char *featurename;
- char *text;
- char *s;
- int len;
-
- s = buf;
- if ((gc = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((text = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
-
- if (strcmp(featurename, "SS_cons") == 0)
- sre_strcat(&(msa->ss_cons), -1, text, len);
- else if (strcmp(featurename, "SA_cons") == 0)
- sre_strcat(&(msa->sa_cons), -1, text, len);
- else if (strcmp(featurename, "RF") == 0)
- sre_strcat(&(msa->rf), -1, text, len);
- else
- MSAAppendGC(msa, featurename, text);
-
- return 1;
-}
-
-/* Format of a GR line:
- * #=GR <seqname> <featurename> <text>
- */
-static int
-parse_gr(MSA *msa, char *buf)
-{
- char *gr;
- char *seqname;
- char *featurename;
- char *text;
- int seqidx;
- int len;
- int j;
- char *s;
-
- s = buf;
- if ((gr = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((seqname = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((text = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
-
- /* GR usually follows sequence it refers to; guess msa->lastidx */
- seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx);
- msa->lastidx = seqidx;
-
- if (strcmp(featurename, "SS") == 0)
- {
- if (msa->ss == NULL)
- {
- msa->ss = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- msa->sslen = MallocOrDie(sizeof(int) * msa->nseqalloc);
- for (j = 0; j < msa->nseqalloc; j++)
- {
- msa->ss[j] = NULL;
- msa->sslen[j] = 0;
- }
- }
- msa->sslen[seqidx] = sre_strcat(&(msa->ss[seqidx]), msa->sslen[seqidx], text, len);
- }
- else if (strcmp(featurename, "SA") == 0)
- {
- if (msa->sa == NULL)
- {
- msa->sa = MallocOrDie(sizeof(char *) * msa->nseqalloc);
- msa->salen = MallocOrDie(sizeof(int) * msa->nseqalloc);
- for (j = 0; j < msa->nseqalloc; j++)
- {
- msa->sa[j] = NULL;
- msa->salen[j] = 0;
- }
- }
- msa->salen[seqidx] = sre_strcat(&(msa->sa[seqidx]), msa->salen[seqidx], text, len);
- }
- else
- MSAAppendGR(msa, featurename, seqidx, text);
-
- return 1;
-}
-
-
-/* comments are simply stored verbatim, not parsed
- */
-static int
-parse_comment(MSA *msa, char *buf)
-{
- char *s;
- char *comment;
-
- s = buf + 1; /* skip leading '#' */
- if (*s == '\n') { *s = '\0'; comment = s; } /* deal with blank comment */
- else if ((comment = sre_strtok(&s, "\n", NULL)) == NULL) return 0;
-
- MSAAddComment(msa, comment);
- return 1;
-}
-
-static int
-parse_sequence(MSA *msa, char *buf)
-{
- char *s;
- char *seqname;
- char *text;
- int seqidx;
- int len;
-
- s = buf;
- if ((seqname = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
- if ((text = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
-
- /* seq usually follows another seq; guess msa->lastidx +1 */
- seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1);
- msa->lastidx = seqidx;
-
- msa->sqlen[seqidx] = sre_strcat(&(msa->aseq[seqidx]), msa->sqlen[seqidx], text, len);
- return 1;
-}
-
-
-
diff --git a/squid/stockholm.h b/squid/stockholm.h
deleted file mode 100644
index a9cae55..0000000
--- a/squid/stockholm.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#ifndef STOCKHOLM_H_INCLUDED
-#define STOCKHOLM_H_INCLUDED
-
-#include "gki.h"
-
-typedef struct {
- int *linetype; /* e.g. STOCKHOLM_GF_LINE; always valid */
- int *featurecode; /* all markup codes: e.g. STOCKHOLM_GF_ID;
- nonmarkup: always set to STOCKHOLM_UNPARSED */
- char **featurename; /* all unparsed markup codes: string, e.g. "ID";
- all other lines: NULL */
- int *seqidx; /* all GS, GR, GC, sequence lines: which sequence;
- other lines: 0 */
- int *len; /* all GR, GC, sequence lines: length of text field;
- other lines: 0 */
- char **text; /* all unparsed nonblank lines: rest of data
- other lines: NULL */
- int nseqalloc; /* current nseqs allocated for in aseqs and ainfo */
- int nlines; /* number of lines in this skel */
- int nlinealloc; /* current # of lines allocated for in this skel */
- int overall_line; /* line # in file (important in files w/ >1 ali)*/
-} alifile_skeleton;
-
-#define STOCKHOLM_GF_LINE 0
-#define STOCKHOLM_GS_LINE 1
-#define STOCKHOLM_GC_LINE 2
-#define STOCKHOLM_GR_LINE 3
-#define STOCKHOLM_SEQ_LINE 4
-#define STOCKHOLM_BLANK_LINE 5
-#define STOCKHOLM_COMMENT_LINE 6
-
-#define STOCKHOLM_UNPARSED 0
-#define STOCKHOLM_GF_ID 1
-#define STOCKHOLM_GF_AC 2
-#define STOCKHOLM_GF_DE 3
-#define STOCKHOLM_GF_AU 4
-#define STOCKHOLM_GF_GA 5
-#define STOCKHOLM_GF_NC 6
-#define STOCKHOLM_GF_TC 7
-#define STOCKHOLM_GS_WT 100
-#define STOCKHOLM_GS_AC 101
-#define STOCKHOLM_GS_DE 102
-#define STOCKHOLM_GC_CS 200
-#define STOCKHOLM_GC_RF 201
-#define STOCKHOLM_GR_SS 300
-#define STOCKHOLM_GR_SA 301
-
-#define SKEL_NSEQLUMP 10 /* allocate for new seqs in blocks of this size */
-#define SKEL_LUMPSIZE 100 /* allocate for new lines in skel in blocks of this size */
-
-#endif /*STOCKHOLM_H_INCLUDED*/
diff --git a/squid/stopwatch.c b/squid/stopwatch.c
deleted file mode 100644
index 92281a7..0000000
--- a/squid/stopwatch.c
+++ /dev/null
@@ -1,309 +0,0 @@
-/************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- ************************************************************/
-
-/* stopwatch.c
- * SRE, Fri Nov 26 14:54:21 1999 [St. Louis] [HMMER]
- * SRE, Thu Aug 3 08:11:52 2000 [St. Louis] [moved to SQUID]
- *
- * Reporting of cpu/system/elapsed time used by a process.
- * thanks to Warren Gish for assistance.
- *
- * Basic API:
- *
- * Stopwatch_t *w;
- * w = StopwatchCreate();
- *
- * StopwatchStart(w);
- * do_lots_of_stuff;
- * StopwatchStop(w);
- * StopwatchDisplay(stdout, "CPU time: ", w);
- *
- * StopwatchFree(w);
- *
- * Some behavior can be controlled at compile time by #define's:
- *
- * SRE_STRICT_ANSI: By default, stopwatch module assumes that a
- * machine is POSIX-compliant (e.g. has struct tms, sys/times.h,
- * and times()). If compiled with -DSRE_STRICT_ANSI, reverts to
- * pure ANSI C conformant implementation. This simpler system
- * won't report system times, only user and elapsed times.
- *
- * SRE_ENABLE_PVM: If compiled with -DSRE_ENABLE_PVM, the
- * functions StopwatchPVMPack() and StopwatchPVMUnpack()
- * are compiled, providing PVM communications ability.
- *
- * One additional compile-time configuration note:
- * PTHREAD_TIMES_HACK: Linux pthreads, as of RH6.0/glibc-devel-2.1.1-6,
- * appears to interact poorly with times() -- usage times in all
- * but the master thread are lost. A workaround for this bug is
- * to run stopwatches in each worker thread, and accumulate those
- * times back into the master stopwatch using StopwatchInclude().
- * (Just like a PVM implementation has to do.) In HMMER, this
- * behavior is compiled in with -DPTHREAD_TIMES_HACK. No
- * changes are made in stopwatch functions themselves, though;
- * all the extra code is HMMER code. See hmmcalibrate.c for
- * an example.
- *
- * See hmmcalibrate.c for examples of more complex usage
- * in dealing with pthreads and PVM.
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#ifdef SRE_ENABLE_PVM
-#include <pvm3.h>
-#endif
-
-#include "stopwatch.h"
-
-/* Function: format_time_string()
- * Date: SRE, Fri Nov 26 15:06:28 1999 [St. Louis]
- *
- * Purpose: Given a number of seconds, format into
- * hh:mm:ss.xx in a provided buffer.
- *
- * Args: buf - allocated space (128 is plenty!)
- * sec - number of seconds
- * do_frac - TRUE (1) to include hundredths of a sec
- */
-static void
-format_time_string(char *buf, double sec, int do_frac)
-{
- int h, m, s, hs;
-
- h = (int) (sec / 3600.);
- m = (int) (sec / 60.) - h * 60;
- s = (int) (sec) - h * 3600 - m * 60;
- if (do_frac) {
- hs = (int) (sec * 100.) - h * 360000 - m * 6000 - s * 100;
- sprintf(buf, "%02d:%02d:%02d.%02d", h,m,s,hs);
- } else {
- sprintf(buf, "%02d:%02d:%02d", h,m,s);
- }
-}
-
-/* Function: StopwatchStart()
- * Date: SRE, Fri Nov 26 15:07:48 1999 [St. Louis]
- *
- * Purpose: Start a stopwatch.
- *
- * Args: w - the watch
- */
-void
-StopwatchStart(Stopwatch_t *w)
-{
- w->t0 = time(NULL);
-#ifdef SRE_STRICT_ANSI
- w->cpu0 = clock();
-#else
- (void) times(&(w->cpu0));
-#endif
-
- w->elapsed = 0.;
- w->user = 0.;
- w->sys = 0.;
-}
-
-/* Function: StopwatchStop()
- * Date: SRE, Fri Nov 26 15:08:16 1999 [St. Louis]
- *
- * Purpose: Stop a stopwatch.
- *
- * The implementation allows "split times":
- * you can stop a watch multiple times, reporting
- * times at multiple points during program
- * execution.
- *
- * Args: w - the watch
- */
-void
-StopwatchStop(Stopwatch_t *w)
-{
- time_t t1;
-#ifdef SRE_STRICT_ANSI
- clock_t cpu1;
-#else
- struct tms cpu1;
- long clk_tck;
-#endif
-
- t1 = time(NULL);
- w->elapsed = difftime(t1, w->t0);
-
-#ifdef SRE_STRICT_ANSI
- cpu1 = clock();
- w->user = (double) (cpu1- w->cpu0) / (double) CLOCKS_PER_SEC;
- w->sys = 0.; /* no way to portably get system time in ANSI C */
-
-#else /* assume we're on a POSIX system by default */
- (void) times(&cpu1);
-
- clk_tck = sysconf(_SC_CLK_TCK);
- w->user = (double) (cpu1.tms_utime + cpu1.tms_cutime -
- w->cpu0.tms_utime - w->cpu0.tms_cutime) /
- (double) clk_tck;
-
- w->sys = (double) (cpu1.tms_stime + cpu1.tms_cstime -
- w->cpu0.tms_stime - w->cpu0.tms_cstime) /
- (double) clk_tck;
-#endif
-}
-
-/* Function: StopwatchInclude()
- * Date: SRE, Fri Nov 26 15:09:34 1999 [St. Louis]
- *
- * Purpose: Merge the cpu and system times from a slave into
- * a master stopwatch. Both watches must be
- * stopped, and should not be stopped again unless
- * You Know What You're Doing.
- *
- * Elapsed time is *not* merged; master is assumed
- * to be keeping track of the wall clock time,
- * and the slave/worker watch is ignored.
- *
- * Used in two cases:
- * 1) PVM; merge in the stopwatch(es) from separate
- * process(es) in a cluster.
- * 2) Threads, for broken pthreads/times() implementations
- * that lose track of cpu times used by spawned
- * threads.
- *
- * Args: w1 - the master stopwatch
- * w2 - the slave/worker watch
- *
- */
-void
-StopwatchInclude(Stopwatch_t *w1, Stopwatch_t *w2)
-{
- w1->user += w2->user;
- w1->sys += w2->sys;
-}
-
-/* Function: StopwatchAlloc(), StopwatchZero(), StopwatchCopy(),
- * StopwatchFree()
- * Date: SRE, Fri Nov 26 15:13:14 1999 [St. Louis]
- *
- * Purpose: The usual creation/manipulation/destruction routines
- * for a stopwatch object.
- */
-Stopwatch_t *
-StopwatchCreate(void)
-{
- Stopwatch_t *w;
- w = malloc(sizeof(Stopwatch_t));
- return w;
-}
-void
-StopwatchZero(Stopwatch_t *w)
-{
- w->elapsed = 0.;
- w->user = 0.;
- w->sys = 0.;
-}
-void
-StopwatchCopy(Stopwatch_t *w1, Stopwatch_t *w2)
-{
- w1->t0 = w2->t0;
-#ifdef SRE_STRICT_ANSI
- w1->cpu0 = w2->cpu0;
-#else
- w1->cpu0.tms_utime = w2->cpu0.tms_utime;
- w1->cpu0.tms_stime = w2->cpu0.tms_stime;
- w1->cpu0.tms_cutime = w2->cpu0.tms_cutime;
- w1->cpu0.tms_cstime = w2->cpu0.tms_cstime;
-#endif
- w1->elapsed = w2->elapsed;
- w1->user = w2->user;
- w1->sys = w2->sys;
-}
-void
-StopwatchFree(Stopwatch_t *w)
-{
- free(w);
-}
-
-
-/* Function: StopwatchDisplay()
- * Date: SRE, Fri Nov 26 15:14:12 1999 [St. Louis]
- *
- * Purpose: Output a usage summary line from a *stopped*
- * stopwatch (the times will reflect the last
- * time StopwatchStop() was called.)
- *
- * For s = "CPU Time: " an example output line is:
- * CPU Time: 142.55u 7.17s 149.72 Elapsed: 00:02:35.00
- *
- * Args: fp - open file for writing (stdout, possibly)
- * s - prefix for the report line
- * w - a (recently stopped) stopwatch
- *
- */
-void
-StopwatchDisplay(FILE *fp, char *s, Stopwatch_t *w)
-{
- char buf[128]; /* (safely holds up to 10^14 years) */
-
- if (s == NULL)
- fputs("CPU Time: ", fp);
- else
- fputs(s, fp);
-
- format_time_string(buf, w->user+w->sys, 1);
-#ifdef SRE_STRICT_ANSI
- fprintf(fp, "%.2fu %s ", w->user, buf);
-#else
- fprintf(fp, "%.2fu %.2fs %s ", w->user, w->sys, buf);
-#endif
-
- format_time_string(buf, w->elapsed, 0);
- fprintf(fp, "Elapsed: %s\n", buf);
-}
-
-#ifdef SRE_ENABLE_PVM
-/* Function: StopwatchPVMPack(), StopwatchPVMUnpack()
- * Date: SRE, Fri Nov 26 15:22:04 1999 [St. Louis]
- *
- * Purpose: Transmission of stopwatch data in a PVM
- * cluster.
- */
-void
-StopwatchPVMPack(Stopwatch_t *w)
-{
- pvm_pkdouble(&(w->elapsed), 1, 1);
- pvm_pkdouble(&(w->user), 1, 1);
- pvm_pkdouble(&(w->sys), 1, 1);
-}
-void
-StopwatchPVMUnpack(Stopwatch_t *w)
-{
- pvm_upkdouble(&(w->elapsed), 1, 1);
- pvm_upkdouble(&(w->user), 1, 1);
- pvm_upkdouble(&(w->sys), 1, 1);
-}
-#endif /*SRE_ENABLE_PVM*/
-
-
-#ifdef TESTDRIVER
-int
-main(int argc, char **argv)
-{
- Stopwatch_t stopwatch;
-
- StopwatchStart(&stopwatch);
-
- sleep(5);
-
- StopwatchStop(&stopwatch);
- StopwatchDisplay(stdout, "CPU Time: ", &stopwatch);
-}
-#endif
diff --git a/squid/stopwatch.h b/squid/stopwatch.h
deleted file mode 100644
index f7e5690..0000000
--- a/squid/stopwatch.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* stopwatch.h
- * SRE, Fri Nov 26 14:54:21 1999 [St. Louis] [HMMER]
- * SRE, Thu Aug 3 08:00:35 2000 [St. Louis] [moved to SQUID]
- * CVS $Id: stopwatch.h,v 1.4 2003/06/13 20:05:31 eddy Exp $
- *
- * Header file for stopwatch.c module:
- * reporting of cpu/system/elapsed time used by a process.
- * See stopwatch.c comments for documentation of compile-time
- * configuration options and API.
- *
- *****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <time.h>
-
-#ifndef SRE_STRICT_ANSI
-#include <sys/times.h>
-#endif
-
-#ifndef STOPWATCH_H_INCLUDED
-#define STOPWATCH_H_INCLUDED
-
-struct stopwatch_s {
- time_t t0; /* Wall clock time, ANSI time() */
-#ifdef SRE_STRICT_ANSI
- clock_t cpu0; /* CPU time, ANSI clock() */
-#else
- struct tms cpu0; /* CPU/system time, POSIX times()*/
-#endif
-
- double elapsed; /* elapsed time, seconds */
- double user; /* CPU time, seconds */
- double sys; /* system time, seconds */
-};
-typedef struct stopwatch_s Stopwatch_t;
-
-extern void StopwatchStart(Stopwatch_t *w);
-extern void StopwatchStop(Stopwatch_t *w);
-extern void StopwatchInclude(Stopwatch_t *w1, Stopwatch_t *w2);
-extern Stopwatch_t *StopwatchCreate(void);
-extern void StopwatchZero(Stopwatch_t *w);
-extern void StopwatchCopy(Stopwatch_t *w1, Stopwatch_t *w2);
-extern void StopwatchFree(Stopwatch_t *w);
-extern void StopwatchDisplay(FILE *fp, char *s, Stopwatch_t *w);
-
-#ifdef SRE_ENABLE_PVM
-extern void StopwatchPVMPack(Stopwatch_t *w);
-extern void StopwatchPVMUnpack(Stopwatch_t *w);
-#endif /* SRE_ENABLE_PVM */
-
-#endif /*STOPWATCH_H_INCLUDED*/
-
diff --git a/squid/test_main.c b/squid/test_main.c
deleted file mode 100644
index 95799fc..0000000
--- a/squid/test_main.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/* Test of the file.c functions
- * cp to ../test_main.c and "make test".
- * Usage: ./test <env> <file>
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "squid.h"
-
-int
-main(int argc, char **argv)
-{
- char *env;
- char *file;
- FILE *fp;
-
- env = argv[1];
- file = argv[2];
-
- fp = EnvFileOpen(file, env);
- if (fp != NULL) printf("File open succeeded\n");
- else printf("File open FAILED\n");
-
- return 0;
-}
diff --git a/squid/translate.c b/squid/translate.c
deleted file mode 100644
index 2020b01..0000000
--- a/squid/translate.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/*
- * translate.c - functions for translating nucleic acid sequence
- * created Tue Jan 12 11:27:29 1993, SRE
- *
- * RCS $Id: translate.c,v 1.3 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <string.h>
-#include "squid.h"
-
-
-
-/* Function: Translate(char *seq, char **code)
- *
- * Given a ptr to the start of a nucleic acid sequence,
- * and a genetic code, translate the sequence into
- * amino acid sequence.
- *
- * code is an array of 65 strings, representing
- * the translations of the 64 codons, arranged
- * in order AAA, AAC, AAG, AAU, ..., UUA, UUC, UUG, UUU.
- * '*' or '***' is used to represent termination
- * codons, usually. The final string, code[64],
- * is the code for an ambiguous amino acid.
- *
- * Because of the way space is allocated for the amino
- * acid sequence, the amino acid strings cannot be
- * longer than 3 letters each. (I don't foresee using
- * anything but the single- and triple- letter codes.)
- *
- * Returns a ptr to the translation string on success,
- * or NULL on failure.
- */
-char *
-Translate(char *seq, char **code)
-{
- int codon; /* index for codon */
- char *aaseq; /* RETURN: the translation */
- char *aaptr; /* ptr into aaseq */
- int i;
-
- if (seq == NULL)
- { squid_errno = SQERR_NODATA; return NULL; }
- if ((aaseq = (char *) calloc (strlen(seq) + 1, sizeof(char))) == NULL)
- Die("calloc failed");
-
- aaptr = aaseq;
- for (; *seq != '\0' && *(seq+1) != '\0' && *(seq+2) != '\0'; seq += 3)
- {
- /* calculate the lookup value for
- this codon */
- codon = 0;
- for (i = 0; i < 3; i++)
- {
- codon *= 4;
- switch (*(seq + i)) {
- case 'A': case 'a': break;
- case 'C': case 'c': codon += 1; break;
- case 'G': case 'g': codon += 2; break;
- case 'T': case 't': codon += 3; break;
- case 'U': case 'u': codon += 3; break;
- default: codon = 64; break;
- }
- if (codon == 64) break;
- }
-
- strcpy(aaptr, code[codon]);
- aaptr += strlen(code[codon]);
- }
- return aaseq;
-}
diff --git a/squid/translate_main.c b/squid/translate_main.c
deleted file mode 100644
index 1ae1fa4..0000000
--- a/squid/translate_main.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* translate_main.c
- *
- * translate - create a file of all possible protein ORFs, given
- * an input nucleic acid sequence
- *
- *
- * Not currently compliant w/ HMMER API.
- *
- * 1.02 Thu Apr 20 16:12:41 1995
- * + incorporated into squid
- * + -a, -s options added
- *
- * CVS $Id: translate_main.c,v 1.7 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "squid.h"
-
-#ifdef NEED_GETOPTH
-#include <getopt.h>
-#endif
-
-#define OPTIONS "ahl:mo:qs:"
-
-static char usage[] = "\
-Usage: translate [-options] <seqfile>\n\
- Translate a nucleic acid sequence into protein ORFs.\n\
- Available options are:\n\
- -a : translate in full, with stops; no individual ORFs\n\
- -h : help; show brief usage and version info\n\
- -l <minlen> : report only ORFs greater than minlen (default 20)\n\
- -m : require ORFs to start with AUG/Met\n\
- -o <outfile> : save results in output file\n\
- -q : quiet; silence banner, for piping or redirection\n\
- -s <stopchar> : with -a, set stop character to <stopchar>\n";
-
-int
-main(int argc, char **argv)
-{
- char *seqfile; /* name of seq file to read */
- SQFILE *seqfp; /* ptr to opened seq file */
- int format; /* format of sequence file */
- char *seq; /* ptr to current sequence */
- SQINFO sqinfo; /* sequence information */
- char *revseq; /* reverse complement of seq */
- int start, end; /* coords of ORF in current seq */
- int orfnumber; /* counter for ORFs in current seq */
- char *aaseq[6]; /* full translations in all 6 frames */
- char *orf; /* ptr to translated ORF sequence */
- char *sptr; /* ptr into orf */
- int len; /* length of an ORF */
- int frame; /* counter for frames (3..5 are reverse)*/
-
- int minimum_len; /* minimum length of ORFs to print out */
- char *outfile; /* file to save output in */
- FILE *ofp; /* where to direct output */
- char stopchar; /* what to use as a stop character */
- int keepstops; /* TRUE to do six big ORFs */
- int quiet; /* TRUE to silence banner */
- int require_met; /* TRUE to start orfs with M */
-
- int optchar; /* option character */
- extern char *optarg; /* for getopt() */
- extern int optind; /* for getopt() */
-
- /***********************************************
- * Parse the command line
- ***********************************************/
-
- format = SQFILE_UNKNOWN; /* autodetect by default */
- minimum_len = 20;
- outfile = NULL;
- stopchar = '*';
- keepstops = FALSE;
- quiet = FALSE;
- require_met = FALSE;
-
- while ((optchar = getopt(argc, argv, OPTIONS)) != -1)
- switch (optchar) {
-
- case 'a': keepstops = TRUE; break;
- case 'l': minimum_len = atoi(optarg); break;
- case 'm': require_met = TRUE; break;
- case 'o': outfile = optarg; break;
- case 'q': quiet = TRUE; break;
- case 's': stopchar = *optarg; break;
-
- case 'h':
- printf("translate %s, %s\n%s\n", SQUID_VERSION, SQUID_DATE, usage);
- exit(EXIT_SUCCESS);
- default:
- Die("%s\n", usage);
- }
-
- if (argc - optind != 1)
- Die("Incorrect number of command line arguments\n%s\n", usage);
-
- seqfile = argv[optind];
-
- /***********************************************
- * Open sequence file and output file
- ***********************************************/
-
- seqfp = SeqfileOpen(seqfile, format, NULL);
- if (seqfp == NULL)
- Die("Failed to open sequence file %s\n%s\n",
- seqfile, usage);
-
- if (outfile != NULL)
- {
- if ((ofp = fopen(outfile, "w")) == NULL)
- Die("Failed to open output file %s\n", outfile);
- }
- else
- ofp = stdout;
-
-
- /***********************************************
- * Main routine
- ***********************************************/
-
- if (! quiet) printf("translate %s, %s\n", SQUID_VERSION, SQUID_DATE);
-
- while (ReadSeq(seqfp, seqfp->format, &seq, &sqinfo))
- {
- s2upper(seq);
- revseq = (char *) malloc (sqinfo.len + 1);
- revcomp(revseq, seq);
- orfnumber = 1;
-
- /* Translate seq in all six frames */
- aaseq[0] = Translate(seq, stdcode1);
- aaseq[1] = Translate(seq + 1, stdcode1);
- aaseq[2] = Translate(seq + 2, stdcode1);
- aaseq[3] = Translate(revseq, stdcode1);
- aaseq[4] = Translate(revseq + 1, stdcode1);
- aaseq[5] = Translate(revseq + 2, stdcode1);
-
-
-
- if (keepstops)
- { /* full translation including stops */
- for (frame = 0; frame < 6; frame++)
- {
- fprintf(ofp, "> %s:%d", sqinfo.name, frame);
- for (sptr = aaseq[frame]; *sptr; sptr++)
- {
- if (*sptr == '*') *sptr = stopchar;
- if (! ((sptr - aaseq[frame]) % 50)) putc('\n', ofp);
- putc((int) *sptr, ofp);
- }
- putc('\n', ofp);
- }
- }
- else
- { /* Print all decent ORF's in FASTA format */
- for (frame = 0; frame < 6; frame++)
- {
- /* initialize strtok on the first ORF;
- termination codons are '*' symbols */
- orf = strtok(aaseq[frame], "*");
- while (orf != NULL && *orf != '\0')
- {
- if (require_met) {
- while (*orf != 'M' && *orf != '\0') orf++;
- }
-
- if (*orf != '\0') {
- len = strlen(orf);
- if (len > minimum_len)
- {
- /* calculate coords */
- start = (orf - aaseq[frame]) * 3 + 1;
- if (frame < 3) start += frame; /* frame corrections */
- else start -= frame-3;
-
- if (frame < 3)
- end = start + len * 3 - 1;
- else
- {
- start = -1 * (start - sqinfo.len - 1);
- end = start - len * 3 + 1;
- }
-
- fprintf(ofp, "> %s.%d length %d, nt %d..%d",
- sqinfo.name,
- orfnumber,
- len,
- start,
- end);
-
- for (sptr = orf; *sptr; sptr++)
- {
- if (! ((sptr - orf) % 50))
- putc('\n', ofp);
- putc((int) *sptr, ofp);
- }
- putc('\n', ofp);
-
- orfnumber++;
- }
- }
- /* pick off next orf */
- orf = strtok(NULL, "*");
-
- }
- }
- }
-
- for (frame = 0; frame < 6; frame++)
- free(aaseq[frame]);
- FreeSequence(seq, &sqinfo);
- free(revseq);
- }
-
- SeqfileClose(seqfp);
-
- /**************************************************
- * Successful return to invocation environment
- **************************************************/
- return 0;
-}
-
diff --git a/squid/types.c b/squid/types.c
deleted file mode 100644
index b36a6d8..0000000
--- a/squid/types.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* file: types.c
- *
- * Finicky type checkers for strings. Return 1 (TRUE) if ok, 0 elsewise.
- * Also, finicky type converters (sre_ntoh32() and friends)
- *
- * CVS $Id: types.c,v 1.6 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <string.h>
-#include <ctype.h>
-#include "squid.h"
-
-/* Function: IsInt()
- *
- * Returns TRUE if s points to something that atoi() will parse
- * completely and convert to an integer.
- */
-int
-IsInt(char *s)
-{
- int hex = 0;
-
- if (s == NULL) {squid_errno = SQERR_PARAMETER; return 0; }
-
- /* skip whitespace */
- while (isspace((int) (*s))) s++;
- /* skip leading sign */
- if (*s == '-' || *s == '+') s++;
- /* skip leading conversion signals */
- if ((strncmp(s, "0x", 2) == 0 && (int) strlen(s) > 2) ||
- (strncmp(s, "0X", 2) == 0 && (int) strlen(s) > 2))
- {
- s += 2;
- hex = 1;
- }
- else if (*s == '0' && (int) strlen(s) > 1)
- s++;
- /* examine remainder for garbage chars */
- if (!hex)
- while (*s != '\0')
- {
- if (!isdigit((int) (*s))) return 0;
- s++;
- }
- else
- while (*s != '\0')
- {
- if (!isxdigit((int) (*s))) return 0;
- s++;
- }
-
- return 1;
-}
-
-
-/* Function: IsReal()
- *
- * Purpose: Returns TRUE if s is a string representation
- * of a valid floating point number.
- */
-int
-IsReal(char *s)
-{
- int gotdecimal = 0;
- int gotexp = 0;
- int gotreal = 0;
-
- if (s == NULL) return 0;
-
- while (isspace((int) (*s))) s++; /* skip leading whitespace */
- if (*s == '-' || *s == '+') s++; /* skip leading sign */
-
- /* Examine remainder for garbage. Allowed one '.' and
- * one 'e' or 'E'; if both '.' and e/E occur, '.'
- * must be first.
- */
- while (*s != '\0')
- {
- if (isdigit((int) (*s)))
- gotreal++;
- else if (*s == '.')
- {
- if (gotdecimal) return 0; /* can't have two */
- if (gotexp) return 0; /* e/E preceded . */
- else gotdecimal++;
- }
- else if (*s == 'e' || *s == 'E')
- {
- if (gotexp) return 0; /* can't have two */
- else gotexp++;
- }
- else if (isspace((int) (*s)))
- break;
-
- s++;
- }
-
- while (isspace((int) (*s))) s++; /* skip trailing whitespace */
- if (*s == '\0' && gotreal) return 1;
- else return 0;
-}
-
-
-/* Function: Byteswap()
- *
- * Purpose: Swap between big-endian and little-endian.
- * For example:
- * int foo = 0x12345678;
- * byteswap((char *) &foo, sizeof(int));
- * printf("%x\n", foo)
- * gives 78563412.
- *
- * I don't fully understand byte-swapping issues.
- * However, I have tested this on chars through floats,
- * on various machines:
- * SGI IRIX 4.0.5, SunOS 4.1.3, DEC Alpha OSF/1, Alliant
- *
- * Date: Sun Feb 12 10:26:22 1995
- */
-void
-Byteswap(char *swap, int nbytes)
-{
- int x;
- char byte;
-
- for (x = 0; x < nbytes / 2; x++)
- {
- byte = swap[nbytes - x - 1];
- swap[nbytes - x - 1] = swap[x];
- swap[x] = byte;
- }
-}
-
-
-
-/* Functions: sre_ntoh16(), etc.
- * Date: SRE, Sun Dec 31 11:26:53 2000 [St. Louis]
- *
- * Purpose: Provide functionality of ntohs(), etc; extended
- * to 64-bit unsigned ints, and explicitly provided
- * in case a machine doesn't have the ntohs()
- * family.
- *
- * If we're using the host functions,
- * USE_HOST_BYTESWAP_FUNCTIONS was set to 1 in
- * squidconf.h, and we #define'd sre_hton16(x)=hton(x), etc.
- * in squid.h. In doing this, we assumed that the
- * host functions work on 16- and 32-bit unsigned quantities.
- * If for some reason that's not true, set
- * USE_HOST_BYTESWAP_FUNCTIONS to 0.
- */
-#ifndef USE_HOST_BYTESWAP_FUNCTIONS
-sqd_uint16
-sre_ntoh16(sqd_uint16 netshort)
-{
-#ifdef WORDS_BIGENDIAN
- return netshort;
-#else
- Byteswap((char *) &netshort, 2);
- return netshort;
-#endif
-}
-sqd_uint32
-sre_ntoh32(sqd_uint32 netlong)
-{
-#ifdef WORDS_BIGENDIAN
- return netlong;
-#else
- Byteswap((char *) &netlong, 4);
- return netlong;
-#endif
-}
-sqd_uint16
-sre_hton16(sqd_uint16 hostshort)
-{
-#ifdef WORDS_BIGENDIAN
- return hostshort;
-#else
- Byteswap((char *) &hostshort, 2);
- return hostshort;
-#endif
-}
-sqd_uint32
-sre_hton32(sqd_uint32 hostlong)
-{
-#ifdef WORDS_BIGENDIAN
- return hostlong;
-#else
- Byteswap((char *) &hostlong, 4);
- return hostlong;
-#endif
-}
-#endif /*USE_HOST_BYTESWAP_FUNCTIONS*/
-
-sqd_uint64
-sre_ntoh64(sqd_uint64 net_int64)
-{
-#ifdef WORDS_BIGENDIAN
- return net_int64;
-#else
- Byteswap((char *) &net_int64, 8);
- return net_int64;
-#endif
-}
-sqd_uint64
-sre_hton64(sqd_uint64 host_int64)
-{
-#ifdef WORDS_BIGENDIAN
- return host_int64;
-#else
- Byteswap((char *) &host_int64, 8);
- return host_int64;
-#endif
-}
-
-
-
-
diff --git a/squid/vectorops.c b/squid/vectorops.c
deleted file mode 100644
index af32973..0000000
--- a/squid/vectorops.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/* vectorops.c
- * Operations on vectors of floats or doubles.
- *
- * DSet(), FSet() - set all items in vector to value.
- * DScale(), FScale() - multiply all items in vector by scale
- * DSum(), FSum() - return sum of values in vector
- * DAdd(), FAdd() - add vec2 to vec1.
- * DCopy(), FCopy() - set vec1 to be same as vec2.
- * DDot(), FDot() - return dot product of two vectors.
- * DMax(), FMax() - return value of maximum element in vector
- * DMin(), FMin() - return value of minimum element in vector
- * DArgMax(), FArgMax() - return index of maximum element in vector
- * DArgMin(), FArgMin() - return index of minimum element in vector
- *
- * DNorm(), FNorm() - normalize a probability vector of length n.
- * DLog(), FLog() - convert to log probabilities
- * DExp(), FExp() - convert log p's back to probabilities
- * DLogSum(), FLogSum() - given vector of log p's; return log of summed p's.
- *
- * SRE, Tue Oct 1 15:23:25 2002 [St. Louis]
- * CVS $Id: vectorops.c,v 1.4 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdlib.h>
-#include <math.h>
-#include <float.h>
-#include "vectorops.h"
-
-void
-DSet(double *vec, int n, double value)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] = value;
-}
-
-void
-FSet(float *vec, int n, float value)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] = value;
-}
-
-void
-DScale(double *vec, int n, double scale)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] *= scale;
-}
-
-void
-FScale(float *vec, int n, float scale)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] *= scale;
-}
-
-double
-DSum(double *vec, int n)
-{
- double sum = 0.;
- int x;
- for (x = 0; x < n; x++) sum += vec[x];
- return sum;
-}
-
-float
-FSum(float *vec, int n)
-{
- float sum = 0.;
- int x;
- for (x = 0; x < n; x++) sum += vec[x];
- return sum;
-}
-
-void
-DAdd(double *vec1, double *vec2, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] += vec2[x];
-}
-
-void
-FAdd(float *vec1, float *vec2, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] += vec2[x];
-}
-
-void
-DCopy(double *vec1, double *vec2, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] = vec2[x];
-}
-
-void
-FCopy(float *vec1, float *vec2, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec1[x] = vec2[x];
-}
-
-double
-DDot(double *vec1, double *vec2, int n)
-{
- double result = 0.;
- int x;
- for (x = 0; x < n; x++) result += vec1[x] * vec2[x];
- return result;
-}
-
-float
-FDot(float *vec1, float *vec2, int n)
-{
- float result = 0.;
- int x;
- for (x = 0; x < n; x++) result += vec1[x] * vec2[x];
- return result;
-}
-
-double
-DMax(double *vec, int n)
-{
- int i;
- double best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] > best) best = vec[i];
- return best;
-}
-
-float
-FMax(float *vec, int n)
-{
- int i;
- float best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] > best) best = vec[i];
- return best;
-}
-
-double
-DMin(double *vec, int n)
-{
- int i;
- double best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] < best) best = vec[i];
- return best;
-}
-
-float
-FMin(float *vec, int n)
-{
- int i;
- float best;
-
- best = vec[0];
- for (i = 1; i < n; i++)
- if (vec[i] < best) best = vec[i];
- return best;
-}
-
-int
-DArgMax(double *vec, int n)
-{
- int i;
- int best = 0;
-
- for (i = 1; i < n; i++)
- if (vec[i] > vec[best]) best = i;
- return best;
-}
-
-int
-FArgMax(float *vec, int n)
-{
- int i;
- int best = 0;
-
- for (i = 1; i < n; i++)
- if (vec[i] > vec[best]) best = i;
- return best;
-}
-
-int
-DArgMin(double *vec, int n)
-{
- int i;
- int best = 0;
- for (i = 1; i < n; i++)
- if (vec[i] < vec[best]) best = i;
- return best;
-}
-
-int
-FArgMin(float *vec, int n)
-{
- int i;
- int best = 0;
-
- for (i = 1; i < n; i++)
- if (vec[i] < vec[best]) best = i;
- return best;
-}
-
-void
-DNorm(double *vec, int n)
-{
- int x;
- double sum;
-
- sum = DSum(vec, n);
- if (sum != 0.0) for (x = 0; x < n; x++) vec[x] /= sum;
- else for (x = 0; x < n; x++) vec[x] = 1. / (double) n;
-}
-
-void
-FNorm(float *vec, int n)
-{
- int x;
- float sum;
-
- sum = FSum(vec, n);
- if (sum != 0.0) for (x = 0; x < n; x++) vec[x] /= sum;
- else for (x = 0; x < n; x++) vec[x] = 1. / (float) n;
-}
-
-void
-DLog(double *vec, int n)
-{
- int x;
- for (x = 0; x < n; x++)
- if (vec[x] > 0.) vec[x] = log(vec[x]);
- else vec[x] = -DBL_MAX;
-}
-
-void
-FLog(float *vec, int n)
-{
- int x;
- for (x = 0; x < n; x++)
- if (vec[x] > 0.) vec[x] = log(vec[x]);
- else vec[x] = -FLT_MAX;
-}
-
-void
-DExp(double *vec, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] = exp(vec[x]);
-}
-
-void
-FExp(float *vec, int n)
-{
- int x;
- for (x = 0; x < n; x++) vec[x] = exp(vec[x]);
-}
-
-double
-DLogSum(double *vec, int n)
-{
- int x;
- double max, sum;
-
- max = DMax(vec, n);
- sum = 0.0;
- for (x = 0; x < n; x++)
- if (vec[x] > max - 50.)
- sum += exp(vec[x] - max);
- sum = log(sum) + max;
- return sum;
-}
-
-float
-FLogSum(float *vec, int n)
-{
- int x;
- float max, sum;
-
- max = FMax(vec, n);
- sum = 0.0;
- for (x = 0; x < n; x++)
- if (vec[x] > max - 50.)
- sum += exp(vec[x] - max);
- sum = log(sum) + max;
- return sum;
-}
-
-
-
diff --git a/squid/vectorops.h b/squid/vectorops.h
deleted file mode 100644
index a887939..0000000
--- a/squid/vectorops.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/* vectorops.h
- * Header file for vectorops.c
- *
- * SRE, Tue Oct 1 15:23:37 2002 [St. Louis]
- * CVS $Id: vectorops.h,v 1.3 2002/10/13 22:29:34 eddy Exp $
- */
-
-extern void DSet(double *vec, int n, double value);
-extern void FSet(float *vec, int n, float value);
-extern void DScale(double *vec, int n, double scale);
-extern void FScale(float *vec, int n, float scale);
-extern double DSum(double *vec, int n);
-extern float FSum(float *vec, int n);
-extern void DAdd(double *vec1, double *vec2, int n);
-extern void FAdd(float *vec1, float *vec2, int n);
-extern void DCopy(double *vec1, double *vec2, int n);
-extern void FCopy(float *vec1, float *vec2, int n);
-extern double DDot(double *vec1, double *vec2, int n);
-extern float FDot(float *vec1, float *vec2, int n);
-extern double DMax(double *vec, int n);
-extern float FMax(float *vec, int n);
-extern double DMin(double *vec, int n);
-extern float FMin(float *vec, int n);
-extern int DArgMax(double *vec, int n);
-extern int FArgMax(float *vec, int n);
-extern int DArgMin(double *vec, int n);
-extern int FArgMin(float *vec, int n);
-extern void DNorm(double *vec, int n);
-extern void FNorm(float *vec, int n);
-extern void DLog(double *vec, int n);
-extern void FLog(float *vec, int n);
-extern void DExp(double *vec, int n);
-extern void FExp(float *vec, int n);
-extern double DLogSum(double *vec, int n);
-extern float FLogSum(float *vec, int n);
-
diff --git a/squid/weight.c b/squid/weight.c
deleted file mode 100644
index 7dff67b..0000000
--- a/squid/weight.c
+++ /dev/null
@@ -1,751 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* weight.c
- * SRE, Thu Mar 3 07:56:01 1994
- *
- * Calculate weights for sequences in an alignment.
- * RCS $Id: weight.c,v 1.11 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <ctype.h>
-#include <string.h>
-#include "squid.h"
-#include "sre_random.h"
-
-static void upweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, int node);
-static void downweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt,
- float *fwt, int node);
-static float simple_distance(char *s1, char *s2);
-static int simple_diffmx(char **aseqs,int num, float ***ret_dmx);
-
-/* Function: GSCWeights()
- *
- * Purpose: Use Erik's tree-based algorithm to set weights for
- * sequences in an alignment. upweight() and downweight()
- * are derived from Graeme Mitchison's code.
- *
- * Args: aseq - array of (0..nseq-1) aligned sequences
- * nseq - number of seqs in alignment
- * alen - length of alignment
- * wgt - allocated [0..nseq-1] array of weights to be returned
- *
- * Return: (void)
- * wgt is filled in.
- */
-void
-GSCWeights(char **aseq, int nseq, int alen, float *wgt)
-{
- float **dmx; /* distance (difference) matrix */
- struct phylo_s *tree;
- float *lwt, *rwt; /* weight on left, right of this tree node */
- float *fwt; /* final weight assigned to this node */
- int i;
-
- /* Sanity check first
- */
- if (nseq == 1) { wgt[0] = 1.0; return; }
-
- /* I use a simple fractional difference matrix derived by
- * pairwise identity. Perhaps I should include a Poisson
- * distance correction.
- */
- MakeDiffMx(aseq, nseq, &dmx);
- if (! Cluster(dmx, nseq, CLUSTER_MIN, &tree)) Die("Cluster() failed");
-
- /* Allocations
- */
- lwt = MallocOrDie (sizeof(float) * (2 * nseq - 1));
- rwt = MallocOrDie (sizeof(float) * (2 * nseq - 1));
- fwt = MallocOrDie (sizeof(float) * (2 * nseq - 1));
-
- /* lwt and rwt are the total branch weight to the left and
- * right of a node or sequence. They are 0..2N-2. 0..N-1 are
- * the sequences; these have weight 0. N..2N-2 are the actual
- * tree nodes.
- */
- for (i = 0; i < nseq; i++)
- lwt[i] = rwt[i] = 0.0;
- /* recursively calculate rwt, lwt, starting
- at node nseq (the root) */
- upweight(tree, nseq, lwt, rwt, nseq);
-
- /* recursively distribute weight across the
- tree */
- fwt[nseq] = nseq;
- downweight(tree, nseq, lwt, rwt, fwt, nseq);
- /* collect the weights */
- for (i = 0; i < nseq; i++)
- wgt[i] = fwt[i];
-
- FMX2Free(dmx);
- FreePhylo(tree, nseq);
- free(lwt); free(rwt); free(fwt);
-}
-
-static void
-upweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, int node)
-{
- int ld,rd;
-
- ld = tree[node-nseq].left;
- if (ld >= nseq) upweight(tree, nseq, lwt, rwt, ld);
- rd = tree[node-nseq].right;
- if (rd >= nseq) upweight(tree, nseq, lwt, rwt, rd);
- lwt[node] = lwt[ld] + rwt[ld] + tree[node-nseq].lblen;
- rwt[node] = lwt[rd] + rwt[rd] + tree[node-nseq].rblen;
-}
-
-
-static void
-downweight(struct phylo_s *tree, int nseq, float *lwt, float *rwt, float *fwt, int node)
-{
- int ld,rd;
- float lnum, rnum;
-
- ld = tree[node-nseq].left;
- rd = tree[node-nseq].right;
- if (lwt[node] + rwt[node] > 0.0)
- {
- fwt[ld] = fwt[node] * (lwt[node] / (lwt[node] + rwt[node]));
- fwt[rd] = fwt[node] * (rwt[node] / (lwt[node] + rwt[node]));
- }
- else
- {
- lnum = (ld >= nseq) ? tree[ld-nseq].incnum : 1.0;
- rnum = (rd >= nseq) ? tree[rd-nseq].incnum : 1.0;
- fwt[ld] = fwt[node] * lnum / (lnum + rnum);
- fwt[rd] = fwt[node] * rnum / (lnum + rnum);
- }
-
- if (ld >= nseq) downweight(tree, nseq, lwt, rwt, fwt, ld);
- if (rd >= nseq) downweight(tree, nseq, lwt, rwt, fwt, rd);
-}
-
-
-
-
-/* Function: VoronoiWeights()
- *
- * Purpose: Calculate weights using the scheme of Sibbald &
- * Argos (JMB 216:813-818 1990). The scheme is
- * slightly modified because the original algorithm
- * actually doesn't work on gapped alignments.
- * The sequences are assumed to be protein.
- *
- * Args: aseq - array of (0..nseq-1) aligned sequences
- * nseq - number of sequences
- * alen - length of alignment
- * wgt - allocated [0..nseq-1] array of weights to be returned
- *
- * Return: void
- * wgt is filled in.
- */
-void
-VoronoiWeights(char **aseq, int nseq, int alen, float *wgt)
-{
- float **dmx; /* distance (difference) matrix */
- float *halfmin; /* 1/2 minimum distance to other seqs */
- char **psym; /* symbols seen in each column */
- int *nsym; /* # syms seen in each column */
- int symseen[27]; /* flags for observed syms */
- char *randseq; /* randomly generated sequence */
- int acol; /* pos in aligned columns */
- int idx; /* index in sequences */
- int symidx; /* 0..25 index for symbol */
- int i; /* generic counter */
- float min; /* minimum distance */
- float dist; /* distance between random and real */
- float challenge, champion; /* for resolving ties */
- int itscale; /* how many iterations per seq */
- int iteration;
- int best; /* index of nearest real sequence */
-
- /* Sanity check first
- */
- if (nseq == 1) { wgt[0] = 1.0; return; }
-
- itscale = 50;
-
- /* Precalculate 1/2 minimum distance to other
- * sequences for each sequence
- */
- if (! simple_diffmx(aseq, nseq, &dmx))
- Die("simple_diffmx() failed");
- halfmin = MallocOrDie (sizeof(float) * nseq);
- for (idx = 0; idx < nseq; idx++)
- {
- for (min = 1.0, i = 0; i < nseq; i++)
- {
- if (i == idx) continue;
- if (dmx[idx][i] < min) min = dmx[idx][i];
- }
- halfmin[idx] = min / 2.0;
- }
- Free2DArray((void **) dmx, nseq);
-
- /* Set up the random sequence generating model.
- */
- psym = MallocOrDie (alen * sizeof(char *));
- nsym = MallocOrDie (alen * sizeof(int));
- for (acol = 0; acol < alen; acol++)
- psym[acol] = MallocOrDie (27 * sizeof(char));
-
-/* #ifdef ORIGINAL_SIBBALD_ALGORITHM_IS_BROKEN */
- for (acol = 0; acol < alen; acol++)
- {
- memset(symseen, 0, sizeof(int) * 27);
- for (idx = 0; idx < nseq; idx++)
- if (! isgap(aseq[idx][acol]))
- {
- if (isupper((int) aseq[idx][acol]))
- symidx = aseq[idx][acol] - 'A';
- else
- symidx = aseq[idx][acol] - 'a';
- if (symidx >= 0 && symidx < 26)
- symseen[symidx] = 1;
- }
- else
- symseen[26] = 1; /* a gap */
-
- for (nsym[acol] = 0, i = 0; i < 26; i++)
- if (symseen[i])
- {
- psym[acol][nsym[acol]] = 'A'+i;
- nsym[acol]++;
- }
- if (symseen[26]) { psym[acol][nsym[acol]] = ' '; nsym[acol]++; }
- }
-/* #endif ORIGINAL_SIBBALD_ALGORITHM_IS_BROKEN */
-
- /* Note: the original Sibbald&Argos algorithm calls for
- * bounding the sampled space using a template-like random
- * sequence generator. However, this leads to one minor
- * and one major problem. The minor problem is that
- * exceptional amino acids in a column can have a
- * significant effect by altering the amount of sampled
- * sequence space; the larger the data set, the worse
- * this problem becomes. The major problem is that
- * there is no reasonable way to deal with gaps.
- * Gapped sequences simply inhabit a different dimensionality
- * and it's pretty painful to imagine calculating Voronoi
- * volumes when the N in your N-space is varying.
- * Note that all the examples shown by Sibbald and Argos
- * are *ungapped* examples.
- *
- * The best way I've found to circumvent this problem is
- * just not to bound the sampled space; count gaps as
- * symbols and generate completely random sequences.
- */
-#ifdef ALL_SEQUENCE_SPACE
- for (acol = 0; acol < alen; acol++)
- {
- strcpy(psym[acol], "ACDEFGHIKLMNPQRSTVWY ");
- nsym[acol] = 21;
- }
-#endif
-
- /* Sibbald and Argos algorithm:
- * 1) assign all seqs weight 0.
- * 2) generate a "random" sequence
- * 3) calculate distance to every other sequence
- * (if we get a distance < 1/2 minimum distance
- * to other real seqs, we can stop)
- * 4) if unique closest sequence, increment its weight 1.
- * if multiple closest seq, choose one randomly
- * 5) repeat 2-4 for lots of iterations
- * 6) normalize all weights to sum to nseq.
- */
- randseq = MallocOrDie ((alen+1) * sizeof(char));
-
- best = 42.; /* solely to silence GCC uninit warnings. */
- FSet(wgt, nseq, 0.0);
- for (iteration = 0; iteration < itscale * nseq; iteration++)
- {
- for (acol = 0; acol < alen; acol++)
- randseq[acol] = (nsym[acol] == 0) ? ' ' : psym[acol][CHOOSE(nsym[acol])];
- randseq[acol] = '\0';
-
- champion = sre_random();
- for (min = 1.0, idx = 0; idx < nseq; idx++)
- {
- dist = simple_distance(aseq[idx], randseq);
- if (dist < halfmin[idx])
- {
- best = idx;
- break;
- }
- if (dist < min)
- { champion = sre_random(); best = idx; min = dist; }
- else if (dist == min)
- {
- challenge = sre_random();
- if (challenge > champion)
- { champion = challenge; best = idx; min = dist; }
- }
- }
- wgt[best] += 1.0;
- }
-
- for (idx = 0; idx < nseq; idx++)
- wgt[idx] = wgt[idx] / (float) itscale;
-
- free(randseq);
- free(nsym);
- free(halfmin);
- Free2DArray((void **) psym, alen);
-}
-
-
-/* Function: simple_distance()
- *
- * Purpose: For two identical-length null-terminated strings, return
- * the fractional difference between them. (0..1)
- * (Gaps don't count toward anything.)
- */
-static float
-simple_distance(char *s1, char *s2)
-{
- int diff = 0;
- int valid = 0;
-
- for (; *s1 != '\0'; s1++, s2++)
- {
- if (isgap(*s1) || isgap(*s2)) continue;
- if (*s1 != *s2) diff++;
- valid++;
- }
- return (valid > 0 ? ((float) diff / (float) valid) : 0.0);
-}
-
-/* Function: simple_diffmx()
- *
- * Purpose: Given a set of flushed, aligned sequences, construct
- * an NxN fractional difference matrix using the
- * simple_distance rule.
- *
- * Args: aseqs - flushed, aligned sequences
- * num - number of aseqs
- * ret_dmx - RETURN: difference matrix (caller must free)
- *
- * Return: 1 on success, 0 on failure.
- */
-static int
-simple_diffmx(char **aseqs,
- int num,
- float ***ret_dmx)
-{
- float **dmx; /* RETURN: distance matrix */
- int i,j; /* counters over sequences */
-
- /* Allocate
- */
- if ((dmx = (float **) malloc (sizeof(float *) * num)) == NULL)
- Die("malloc failed");
- for (i = 0; i < num; i++)
- if ((dmx[i] = (float *) malloc (sizeof(float) * num)) == NULL)
- Die("malloc failed");
-
- /* Calculate distances, symmetric matrix
- */
- for (i = 0; i < num; i++)
- for (j = i; j < num; j++)
- dmx[i][j] = dmx[j][i] = simple_distance(aseqs[i], aseqs[j]);
-
- /* Return
- */
- *ret_dmx = dmx;
- return 1;
-}
-
-
-
-/* Function: BlosumWeights()
- * Date: SRE, Fri Jul 16 17:33:59 1999 (St. Louis)
- *
- * Purpose: Assign weights to a set of aligned sequences
- * using the BLOSUM rule:
- * - do single linkage clustering at some pairwise identity
- * - in each cluster, give each sequence 1/clustsize
- * total weight.
- *
- * The clusters have no pairwise link >= maxid.
- *
- * O(N) in memory. Probably ~O(NlogN) in time; O(N^2)
- * in worst case, which is no links between sequences
- * (e.g., values of maxid near 1.0).
- *
- * Args: aseqs - alignment
- * nseq - number of seqs in alignment
- * alen - # of columns in alignment
- * maxid - fractional identity (e.g. 0.62 for BLOSUM62)
- * wgt - [0..nseq-1] array of weights to be returned
- */
-void
-BlosumWeights(char **aseqs, int nseq, int alen, float maxid, float *wgt)
-{
- int *c, nc;
- int *nmem; /* number of seqs in each cluster */
- int i; /* loop counter */
-
- SingleLinkCluster(aseqs, nseq, alen, maxid, &c, &nc);
-
- FSet(wgt, nseq, 1.0);
- nmem = MallocOrDie(sizeof(int) * nc);
-
- for (i = 0; i < nc; i++) nmem[i] = 0;
- for (i = 0; i < nseq; i++) nmem[c[i]]++;
- for (i = 0; i < nseq; i++) wgt[i] = 1. / (float) nmem[c[i]];
-
- free(nmem);
- free(c);
- return;
-}
-
-
-/* Function: PositionBasedWeights()
- * Date: SRE, Fri Jul 16 17:47:22 1999 [St. Louis]
- *
- * Purpose: Implementation of Henikoff and Henikoff position-based
- * weights (JMB 243:574-578, 1994) [Henikoff94b].
- *
- * A significant advantage of this approach that Steve and Jorja
- * don't point out is that it is O(N) in memory, unlike
- * many other approaches like GSC weights or Voronoi.
- *
- * A potential disadvantage that they don't point out
- * is that in the theoretical limit of infinite sequences
- * in the alignment, weights go flat: eventually every
- * column has at least one representative of each of 20 aa (or 4 nt)
- * in it.
- *
- * They also don't give a rule for how to handle gaps.
- * The rule used here seems the obvious and sensible one
- * (ignore them). This means that longer sequences
- * initially get more weight; hence a "double
- * normalization" in which the weights are first divided
- * by sequence length (to compensate for that effect),
- * then normalized to sum to nseq.
- *
- * Limitations:
- * Implemented in a way that's alphabet-independent:
- * it uses the 26 upper case letters as "residues".
- * Any alphabetic character in aseq is interpreted as
- * a unique "residue" (case insensitively; lower case
- * mapped to upper case). All other characters are
- * interpreted as gaps.
- *
- * This way, we don't have to pass around any alphabet
- * type info (DNA vs. RNA vs. protein) and don't have
- * to deal with remapping IUPAC degenerate codes
- * probabilistically. However, on the down side,
- * a sequence with a lot of degenerate IUPAC characters
- * will get an artifactually high PB weight.
- *
- * Args: aseq - sequence alignment to weight
- * nseq - number of sequences in alignment
- * alen - length of alignment
- * wgt - RETURN: weights filled in (pre-allocated 0..nseq-1)
- *
- * Returns: (void)
- * wgt is allocated (0..nseq-1) by caller, and filled in here.
- */
-void
-PositionBasedWeights(char **aseq, int nseq, int alen, float *wgt)
-{
- int rescount[26]; /* count of A-Z residues in a column */
- int nres; /* number of different residues in col */
- int idx, pos; /* indices into aseq */
- int x;
- float norm;
-
- FSet(wgt, nseq, 0.0);
- for (pos = 0; pos < alen; pos++)
- {
- for (x = 0; x < 26; x++) rescount[x] = 0;
- for (idx = 0; idx < nseq; idx++)
- if (isalpha((int) aseq[idx][pos]))
- rescount[toupper((int) aseq[idx][pos]) - 'A'] ++;
-
- nres = 0;
- for (x = 0; x < 26; x++)
- if (rescount[x] > 0) nres++;
-
- for (idx = 0; idx < nseq; idx++)
- if (isalpha((int) aseq[idx][pos]))
- wgt[idx] += 1. / (float) (nres * rescount[toupper((int) aseq[idx][pos]) - 'A']);
- }
-
- for (idx = 0; idx < nseq; idx++)
- wgt[idx] /= (float) DealignedLength(aseq[idx]);
- norm = (float) nseq / FSum(wgt, nseq);
- FScale(wgt, nseq, norm);
- return;
-}
-
-
-
-
-/* Function: FilterAlignment()
- * Date: SRE, Wed Jun 30 09:19:30 1999 [St. Louis]
- *
- * Purpose: Constructs a new alignment by removing near-identical
- * sequences from a given alignment (where identity is
- * calculated *based on the alignment*).
- * Does not affect the given alignment.
- * Keeps earlier sequence, discards later one.
- *
- * Usually called as an ad hoc sequence "weighting" mechanism.
- *
- * Limitations:
- * Unparsed Stockholm markup is not propagated into the
- * new alignment.
- *
- * Args: msa -- original alignment
- * cutoff -- fraction identity cutoff. 0.8 removes sequences > 80% id.
- * ret_new -- RETURN: new MSA, usually w/ fewer sequences
- *
- * Return: (void)
- * ret_new must be free'd by caller: MSAFree().
- */
-void
-FilterAlignment(MSA *msa, float cutoff, MSA **ret_new)
-{
- int nnew; /* number of seqs in new alignment */
- int *list;
- int *useme;
- float ident;
- int i,j;
- int remove;
-
- /* find which seqs to keep (list) */
- /* diff matrix; allow ragged ends */
- list = MallocOrDie (sizeof(int) * msa->nseq);
- useme = MallocOrDie (sizeof(int) * msa->nseq);
- for (i = 0; i < msa->nseq; i++) useme[i] = FALSE;
-
- nnew = 0;
- for (i = 0; i < msa->nseq; i++)
- {
- remove = FALSE;
- for (j = 0; j < nnew; j++)
- {
- ident = PairwiseIdentity(msa->aseq[i], msa->aseq[list[j]]);
- if (ident > cutoff)
- {
- remove = TRUE;
- printf("removing %12s -- fractional identity %.2f to %s\n",
- msa->sqname[i], ident,
- msa->sqname[list[j]]);
- break;
- }
- }
- if (remove == FALSE) {
- list[nnew++] = i;
- useme[i] = TRUE;
- }
- }
-
- MSASmallerAlignment(msa, useme, ret_new);
- free(list);
- free(useme);
- return;
-}
-
-
-/* Function: SampleAlignment()
- * Date: SRE, Wed Jun 30 10:13:56 1999 [St. Louis]
- *
- * Purpose: Constructs a new, smaller alignment by sampling a given
- * number of sequences at random. Does not change the
- * alignment nor the order of the sequences.
- *
- * If you ask for a sample that is larger than nseqs,
- * it silently returns the original alignment.
- *
- * Not really a weighting method, but this is as good
- * a place as any to keep it, since it's similar in
- * construction to FilterAlignment().
- *
- * Args: msa -- original alignment
- * sample -- number of sequences in new alignment (0 < sample <= nseq)
- * ret_new -- RETURN: new MSA
- *
- * Return: (void)
- * ret_new must be free'd by caller: MSAFree().
- */
-void
-SampleAlignment(MSA *msa, int sample, MSA **ret_new)
-{
- int *list; /* array for random selection w/o replace */
- int *useme; /* array of flags 0..nseq-1: TRUE to use */
- int i, idx;
- int len;
-
- /* Allocations
- */
- list = (int *) MallocOrDie (sizeof(int) * msa->nseq);
- useme = (int *) MallocOrDie (sizeof(int) * msa->nseq);
- for (i = 0; i < msa->nseq; i++)
- {
- list[i] = i;
- useme[i] = FALSE;
- }
-
- /* Sanity check.
- */
- if (sample >= msa->nseq) sample = msa->nseq;
-
- /* random selection w/o replacement */
- for (len = msa->nseq, i = 0; i < sample; i++)
- {
- idx = CHOOSE(len);
- printf("chose %d: %s\n", list[idx], msa->sqname[list[idx]]);
- useme[list[idx]] = TRUE;
- list[idx] = list[--len];
- }
-
- MSASmallerAlignment(msa, useme, ret_new);
- free(list);
- free(useme);
- return;
-}
-
-
-/* Function: SingleLinkCluster()
- * Date: SRE, Fri Jul 16 15:02:57 1999 [St. Louis]
- *
- * Purpose: Perform simple single link clustering of seqs in a
- * sequence alignment. A pairwise identity threshold
- * defines whether two sequences are linked or not.
- *
- * Important: runs in O(N) memory, unlike standard
- * graph decomposition algorithms that use O(N^2)
- * adjacency matrices or adjacency lists. Requires
- * O(N^2) time in worst case (which is when you have
- * no links at all), O(NlogN) in "average"
- * case, and O(N) in best case (when there is just
- * one cluster in a completely connected graph.
- *
- * (Developed because hmmbuild could no longer deal
- * with GP120, a 16,013 sequence alignment.)
- *
- * Limitations:
- * CASE-SENSITIVE. Assumes aseq have been put into
- * either all lower or all upper case; or at least,
- * within a column, there's no mixed case.
- *
- * Algorithm:
- * I don't know if this algorithm is published. I
- * haven't seen it in graph theory books, but that might
- * be because it's so obvious that nobody's bothered.
- *
- * In brief, we're going to do a breadth-first search
- * of the graph, and we're going to calculate links
- * on the fly rather than precalculating them into
- * some sort of standard adjacency structure.
- *
- * While working, we keep two stacks of maximum length N:
- * a : list of vertices that are still unconnected.
- * b : list of vertices that we've connected to
- * in our current breadth level, but we haven't
- * yet tested for other connections to a.
- * The current length (number of elements in) a and b are
- * kept in na, nb.
- *
- * We store our results in an array of length N:
- * c : assigns each vertex to a component. for example
- * c[4] = 1 means that vertex 4 is in component 1.
- * nc is the number of components. Components
- * are numbered from 0 to nc-1. We return c and nc
- * to our caller.
- *
- * The algorithm is:
- *
- * Initialisation:
- * a <-- all the vertices
- * na <-- N
- * b <-- empty set
- * nb <-- 0
- * nc <-- 0
- *
- * Then:
- * while (a is not empty)
- * pop a vertex off a, push onto b
- * while (b is not empty)
- * pop vertex v off b
- * assign c[v] = nc
- * for each vertex w in a:
- * compare v,w. If w is linked to v, remove w
- * from a, push onto b.
- * nc++
- * q.e.d. :)
- *
- * Args: aseq - aligned sequences
- * nseq - number of sequences in aseq
- * alen - alignment length
- * maxid - fractional identity threshold 0..1. if id >= maxid, seqs linked
- * ret_c - RETURN: 0..nseq-1 assignments of seqs to components (clusters)
- * ret_nc - RETURN: number of components
- *
- * Returns: void.
- * ret_c is allocated here. Caller free's with free(*ret_c)
- */
-void
-SingleLinkCluster(char **aseq, int nseq, int alen, float maxid,
- int **ret_c, int *ret_nc)
-{
- int *a, na; /* stack of available vertices */
- int *b, nb; /* stack of working vertices */
- int *c; /* array of results */
- int nc; /* total number of components */
- int v,w; /* index of a working vertices */
- int i; /* loop counter */
-
- /* allocations and initializations
- */
- a = MallocOrDie (sizeof(int) * nseq);
- b = MallocOrDie (sizeof(int) * nseq);
- c = MallocOrDie (sizeof(int) * nseq);
- for (i = 0; i < nseq; i++) a[i] = i;
- na = nseq;
- nb = 0;
- nc = 0;
-
- /* Main algorithm
- */
- while (na > 0)
- {
- v = a[na-1]; na--; /* pop a vertex off a, */
- b[nb] = v; nb++; /* and push onto b */
- while (nb > 0)
- {
- v = b[nb-1]; nb--; /* pop vertex off b */
- c[v] = nc; /* assign it to component nc */
- for (i = na-1; i >= 0; i--)/* backwards, becase of deletion/swapping we do*/
- if (simple_distance(aseq[v], aseq[a[i]]) <= 1. - maxid) /* linked? */
- {
- w = a[i]; a[i] = a[na-1]; na--; /* delete w from a (note swap) */
- b[nb] = w; nb++; /* push w onto b */
- }
- }
- nc++;
- }
-
- /* Cleanup and return
- */
- free(a);
- free(b);
- *ret_c = c;
- *ret_nc = nc;
- return;
-}
diff --git a/squid/weight_main.c b/squid/weight_main.c
deleted file mode 100644
index 6a3d127..0000000
--- a/squid/weight_main.c
+++ /dev/null
@@ -1,189 +0,0 @@
-/*****************************************************************
- * HMMER - Biological sequence analysis with profile HMMs
- * Copyright (C) 1992-2003 Washington University School of Medicine
- * All Rights Reserved
- *
- * This source code is distributed under the terms of the
- * GNU General Public License. See the files COPYING and LICENSE
- * for details.
- *****************************************************************/
-
-/* weight_main.c
- * SRE, Thu Mar 3 13:43:39 1994
- *
- * Calculate weights for a sequence alignment.
- * CVS $Id: weight_main.c,v 1.6 2003/04/14 16:00:16 eddy Exp $
- */
-
-#include "squidconf.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-#include "squid.h"
-#include "msa.h"
-
-static char banner[] = "weight - calculate sequence weights for an alignment";
-
-static char usage[] = "\
-Usage: weight [-options] <alignment file>\n\
- Available options:\n\
- -b <f> : use BLOSUM weighting scheme at <f> fractional identity\n\
- -f <f> : filter out seqs w/ fractional ident > <x> [0-1]\n\
- -h : help; print version and usage info\n\
- -o <file> : save weight-annotated alignment in <outfile>\n\
- -p : use position based weight scheme (Henikoff & Henikoff)\n\
- -s <n> : sample <n> sequences at random into a new alignment\n\
- -v : use Voronoi weight scheme (Sibbald & Argos) \n\
-";
-
-static char experts[] = "\
- Expert options:\n\
- --informat <s> : specify alignment file format <s>\n\
- allowed formats: SELEX, MSF, Clustal, a2m, PHYLIP\n\
- --quiet : suppress verbose banner\n\
-";
-
-static struct opt_s OPTIONS[] = {
- { "-b", TRUE, sqdARG_FLOAT },
- { "-f", TRUE, sqdARG_FLOAT },
- { "-h", TRUE, sqdARG_NONE },
- { "-o", TRUE, sqdARG_STRING },
- { "-p", TRUE, sqdARG_NONE },
- { "-s", TRUE, sqdARG_INT },
- { "-v", TRUE, sqdARG_NONE },
- { "--informat", FALSE, sqdARG_STRING },
- { "--quiet", FALSE, sqdARG_NONE },
-};
-#define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
-
-int
-main(int argc, char **argv)
-{
- char *seqfile; /* file containing aligned seqs */
- MSAFILE *afp; /* pointer to open alignment file */
- MSA *msa; /* multiple sequence alignment */
- int fmt; /* expected format of alignment file */
- int idx;
- char *outfile; /* output file for weighted alignment */
- FILE *ofp; /* open outfile */
-
- int do_voronoi; /* use Sibbald/Argos Voronoi scheme */
- int do_blosum; /* use BLOSUM weighting scheme */
- int do_pbased; /* use position-based weights */
- int do_filter; /* use filtering scheme */
- float idlevel; /* identity level to filter at, [0-1] */
- int samplesize; /* if >0, don't weight, random sample */
- int be_quiet; /* TRUE to suppress banner */
-
- char *optname; /* name of option found by Getopt() */
- char *optarg; /* argument found by Getopt() */
- int optind; /* index in argv[] */
-
- /***********************************************
- * Parse command line
- ***********************************************/
-
- fmt = MSAFILE_UNKNOWN; /* autodetect file format by default */
- outfile = NULL;
- do_blosum = FALSE;
- do_voronoi = FALSE;
- do_pbased = FALSE;
- do_filter = FALSE;
- samplesize = 0;
- be_quiet = FALSE;
- idlevel = 0.; /* just to suppress gcc uninit warnings */
-
- while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
- &optind, &optname, &optarg))
- {
- if (strcmp(optname, "-b") == 0)
- { do_blosum = TRUE; idlevel = atof(optarg); }
- else if (strcmp(optname, "-f") == 0)
- { do_filter = TRUE; idlevel = atof(optarg); }
- else if (strcmp(optname, "-o") == 0) outfile = optarg;
- else if (strcmp(optname, "-p") == 0) do_pbased = TRUE;
- else if (strcmp(optname, "-s") == 0) samplesize = atoi(optarg);
- else if (strcmp(optname, "-v") == 0) do_voronoi = TRUE;
- else if (strcmp(optname, "--quiet") == 0) be_quiet = TRUE;
- else if (strcmp(optname, "--informat") == 0) {
- fmt = String2SeqfileFormat(optarg);
- if (fmt == MSAFILE_UNKNOWN)
- Die("unrecognized sequence file format \"%s\"", optarg);
- if (! IsAlignmentFormat(fmt))
- Die("%s is an unaligned format, can't read as an alignment", optarg);
- }
- else if (strcmp(optname, "-h") == 0)
- {
- SqdBanner(stdout, banner);
- puts(usage);
- puts(experts);
- exit(EXIT_SUCCESS);
- }
- }
-
- if (argc -optind != 1)
- Die("Wrong number of arguments specified on command line\n%s\n", usage);
- seqfile = argv[optind];
-
- if (outfile == NULL)
- ofp = stdout;
- else if ((ofp = fopen(outfile, "w")) == NULL)
- Die("Failed to open alignment output file %s", outfile);
-
- if (do_voronoi + do_pbased + do_blosum + do_filter + samplesize > 1)
- Die("Choose only one weighting scheme, please.\n%s\n", usage);
-
- if (do_voronoi || samplesize > 0)
- sre_srandom(time(0));
-
- if (! be_quiet)
- SqdBanner(stdout, banner);
-
- /***********************************************
- * Open the input alignment file and start...
- * be prepared to deal with multiple entries in Stockholm files
- ***********************************************/
-
- if ((afp = MSAFileOpen(seqfile, fmt, NULL)) == NULL)
- Die("Alignment file %s could not be opened for reading", seqfile);
-
- while ((msa = MSAFileRead(afp)) != NULL)
- {
- for (idx = 0; idx < msa->nseq; idx++)
- s2upper(msa->aseq[idx]);
-
- if (do_filter || samplesize > 0)
- {
- MSA *new;
-
- if (do_filter)
- FilterAlignment(msa, idlevel, &new);
- else if (samplesize > 0)
- SampleAlignment(msa, samplesize, &new);
-
- if (new != NULL) {
- WriteStockholm(ofp, new);
- MSAFree(msa);
- MSAFree(new);
- }
- }
- else
- {
- if (do_voronoi) VoronoiWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt);
- else if (do_blosum) BlosumWeights(msa->aseq, msa->nseq, msa->alen, idlevel, msa->wgt);
- else if (do_pbased) PositionBasedWeights(msa->aseq, msa->nseq, msa->alen, msa->wgt);
- else GSCWeights (msa->aseq, msa->nseq, msa->alen, msa->wgt);
-
- msa->flags |= MSA_SET_WGT;
- WriteStockholm(ofp, msa);
- MSAFree(msa);
- }
- }
- MSAFileClose(afp);
- fclose(ofp);
- return EXIT_SUCCESS;
-}
-
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/hmmer2.git
More information about the debian-med-commit
mailing list